##// END OF EJS Templates
delta-find: move `_refinedgroups` on the `_DeltaSearch` object...
marmoute -
r52214:c8307440 default
parent child Browse files
Show More
@@ -1,1670 +1,1673 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
590 performant."""
591 if deltainfo is None:
591 if deltainfo is None:
592 return False
592 return False
593
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
597 assert (
598 revinfo.cachedelta is None
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
601 )
601 )
602
602
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
607 # we consume.
608
608
609 textlen = revinfo.textlen
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
610 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
611 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
615
615
616 # Bad delta from read span:
616 # Bad delta from read span:
617 #
617 #
618 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
619 #
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
624 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
626 return False
627
627
628 # Bad delta from new delta size:
628 # Bad delta from new delta size:
629 #
629 #
630 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
633 return False
633 return False
634
634
635 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
636 #
636 #
637 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
639 return False
640
640
641 # Bad delta from chain length:
641 # Bad delta from chain length:
642 #
642 #
643 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
644 if (
644 if (
645 revlog.delta_config.max_chain_len
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
651 #
651 #
652 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
654 # created.
655 if (
655 if (
656 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
658 ):
659 return False
659 return False
660
660
661 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
662 # snapshot
663 if (
663 if (
664 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
666 ):
667 return False
667 return False
668
668
669 return True
669 return True
670
670
671
671
672 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
674 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
676
676
677
677
678 class _DeltaSearch:
678 class _DeltaSearch:
679 """perform the search of a good delta for a single revlog revision
679 """perform the search of a good delta for a single revlog revision
680
680
681 note: some of the deltacomputer.finddeltainfo logic should probably move
681 note: some of the deltacomputer.finddeltainfo logic should probably move
682 here.
682 here.
683 """
683 """
684
684
685 def __init__(
685 def __init__(
686 self,
686 self,
687 revlog,
687 revlog,
688 textlen,
688 textlen,
689 p1,
689 p1,
690 p2,
690 p2,
691 cachedelta,
691 cachedelta,
692 excluded_bases=None,
692 excluded_bases=None,
693 target_rev=None,
693 target_rev=None,
694 snapshot_cache=None,
694 snapshot_cache=None,
695 ):
695 ):
696 self.revlog = revlog
696 self.revlog = revlog
697 self.textlen = textlen
697 self.textlen = textlen
698 self.p1 = p1
698 self.p1 = p1
699 self.p2 = p2
699 self.p2 = p2
700 self.cachedelta = cachedelta
700 self.cachedelta = cachedelta
701 self.excluded_bases = excluded_bases
701 self.excluded_bases = excluded_bases
702 self.target_rev = target_rev
702 self.target_rev = target_rev
703 self.snapshot_cache = snapshot_cache
703 self.snapshot_cache = snapshot_cache
704
704
705 def candidate_groups(self):
705 def candidate_groups(self):
706 """Provides group of revision to be tested as delta base
706 """Provides group of revision to be tested as delta base
707
707
708 This top level function focus on emitting groups with unique and
708 This top level function focus on emitting groups with unique and
709 worthwhile content. See _raw_candidate_groups for details about the
709 worthwhile content. See _raw_candidate_groups for details about the
710 group order.
710 group order.
711 """
711 """
712 # should we try to build a delta?
712 # should we try to build a delta?
713 if not (len(self.revlog) and self.revlog._storedeltachains):
713 if not (len(self.revlog) and self.revlog._storedeltachains):
714 yield None
714 yield None
715 return
715 return
716
716
717 if self.target_rev is None:
717 if self.target_rev is None:
718 self.target_rev = len(self.revlog)
718 self.target_rev = len(self.revlog)
719
719
720 if not self.revlog.delta_config.general_delta:
720 if not self.revlog.delta_config.general_delta:
721 # before general delta, there is only one possible delta base
721 # before general delta, there is only one possible delta base
722 yield (self.target_rev - 1,)
722 yield (self.target_rev - 1,)
723 yield None
723 yield None
724 return
724 return
725
725
726 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
726 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
727 # so we should never end up asking such question. Adding the assert as
727 # so we should never end up asking such question. Adding the assert as
728 # a safe-guard to detect anything that would be fishy in this regard.
728 # a safe-guard to detect anything that would be fishy in this regard.
729 assert (
729 assert (
730 self.cachedelta is None
730 self.cachedelta is None
731 or self.cachedelta[2] != DELTA_BASE_REUSE_FORCE
731 or self.cachedelta[2] != DELTA_BASE_REUSE_FORCE
732 or not self.revlog.delta_config.general_delta
732 or not self.revlog.delta_config.general_delta
733 )
733 )
734
734
735 deltalength = self.revlog.length
735 deltalength = self.revlog.length
736 deltaparent = self.revlog.deltaparent
736 deltaparent = self.revlog.deltaparent
737 sparse = self.revlog.delta_config.sparse_revlog
737 sparse = self.revlog.delta_config.sparse_revlog
738 good = None
738 good = None
739
739
740 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
740 deltas_limit = self.textlen * LIMIT_DELTA2TEXT
741 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
741 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
742
742
743 tested = {nullrev}
743 tested = {nullrev}
744 candidates = _refinedgroups(
744 candidates = self._refined_groups()
745 self.revlog,
746 self.p1,
747 self.p2,
748 self.cachedelta,
749 snapshot_cache=self.snapshot_cache,
750 )
751 while True:
745 while True:
752 temptative = candidates.send(good)
746 temptative = candidates.send(good)
753 if temptative is None:
747 if temptative is None:
754 break
748 break
755 group = []
749 group = []
756 for rev in temptative:
750 for rev in temptative:
757 # skip over empty delta (no need to include them in a chain)
751 # skip over empty delta (no need to include them in a chain)
758 while not (rev == nullrev or rev in tested or deltalength(rev)):
752 while not (rev == nullrev or rev in tested or deltalength(rev)):
759 tested.add(rev)
753 tested.add(rev)
760 rev = deltaparent(rev)
754 rev = deltaparent(rev)
761 # no need to try a delta against nullrev, this will be done as
755 # no need to try a delta against nullrev, this will be done as
762 # a last resort.
756 # a last resort.
763 if rev == nullrev:
757 if rev == nullrev:
764 continue
758 continue
765 # filter out revision we tested already
759 # filter out revision we tested already
766 if rev in tested:
760 if rev in tested:
767 continue
761 continue
768
762
769 # an higher authority deamed the base unworthy (e.g. censored)
763 # an higher authority deamed the base unworthy (e.g. censored)
770 if (
764 if (
771 self.excluded_bases is not None
765 self.excluded_bases is not None
772 and rev in self.excluded_bases
766 and rev in self.excluded_bases
773 ):
767 ):
774 tested.add(rev)
768 tested.add(rev)
775 continue
769 continue
776 # We are in some recomputation cases and that rev is too high
770 # We are in some recomputation cases and that rev is too high
777 # in the revlog
771 # in the revlog
778 if self.target_rev is not None and rev >= self.target_rev:
772 if self.target_rev is not None and rev >= self.target_rev:
779 tested.add(rev)
773 tested.add(rev)
780 continue
774 continue
781 # filter out delta base that will never produce good delta
775 # filter out delta base that will never produce good delta
782 if deltas_limit < self.revlog.length(rev):
776 if deltas_limit < self.revlog.length(rev):
783 tested.add(rev)
777 tested.add(rev)
784 continue
778 continue
785 if sparse and self.revlog.rawsize(rev) < (
779 if sparse and self.revlog.rawsize(rev) < (
786 self.textlen // LIMIT_BASE2TEXT
780 self.textlen // LIMIT_BASE2TEXT
787 ):
781 ):
788 tested.add(rev)
782 tested.add(rev)
789 continue
783 continue
790 # no delta for rawtext-changing revs (see "candelta" for why)
784 # no delta for rawtext-changing revs (see "candelta" for why)
791 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
785 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
792 tested.add(rev)
786 tested.add(rev)
793 continue
787 continue
794
788
795 # If we reach here, we are about to build and test a delta.
789 # If we reach here, we are about to build and test a delta.
796 # The delta building process will compute the chaininfo in all
790 # The delta building process will compute the chaininfo in all
797 # case, since that computation is cached, it is fine to access
791 # case, since that computation is cached, it is fine to access
798 # it here too.
792 # it here too.
799 chainlen, chainsize = self.revlog._chaininfo(rev)
793 chainlen, chainsize = self.revlog._chaininfo(rev)
800 # if chain will be too long, skip base
794 # if chain will be too long, skip base
801 if (
795 if (
802 self.revlog.delta_config.max_chain_len
796 self.revlog.delta_config.max_chain_len
803 and chainlen >= self.revlog.delta_config.max_chain_len
797 and chainlen >= self.revlog.delta_config.max_chain_len
804 ):
798 ):
805 tested.add(rev)
799 tested.add(rev)
806 continue
800 continue
807 # if chain already have too much data, skip base
801 # if chain already have too much data, skip base
808 if deltas_limit < chainsize:
802 if deltas_limit < chainsize:
809 tested.add(rev)
803 tested.add(rev)
810 continue
804 continue
811 if (
805 if (
812 sparse
806 sparse
813 and self.revlog.delta_config.upper_bound_comp is not None
807 and self.revlog.delta_config.upper_bound_comp is not None
814 ):
808 ):
815 maxcomp = self.revlog.delta_config.upper_bound_comp
809 maxcomp = self.revlog.delta_config.upper_bound_comp
816 basenotsnap = (self.p1, self.p2, nullrev)
810 basenotsnap = (self.p1, self.p2, nullrev)
817 if rev not in basenotsnap and self.revlog.issnapshot(rev):
811 if rev not in basenotsnap and self.revlog.issnapshot(rev):
818 snapshotdepth = self.revlog.snapshotdepth(rev)
812 snapshotdepth = self.revlog.snapshotdepth(rev)
819 # If text is significantly larger than the base, we can
813 # If text is significantly larger than the base, we can
820 # expect the resulting delta to be proportional to the size
814 # expect the resulting delta to be proportional to the size
821 # difference
815 # difference
822 revsize = self.revlog.rawsize(rev)
816 revsize = self.revlog.rawsize(rev)
823 rawsizedistance = max(self.textlen - revsize, 0)
817 rawsizedistance = max(self.textlen - revsize, 0)
824 # use an estimate of the compression upper bound.
818 # use an estimate of the compression upper bound.
825 lowestrealisticdeltalen = rawsizedistance // maxcomp
819 lowestrealisticdeltalen = rawsizedistance // maxcomp
826
820
827 # check the absolute constraint on the delta size
821 # check the absolute constraint on the delta size
828 snapshotlimit = self.textlen >> snapshotdepth
822 snapshotlimit = self.textlen >> snapshotdepth
829 if snapshotlimit < lowestrealisticdeltalen:
823 if snapshotlimit < lowestrealisticdeltalen:
830 # delta lower bound is larger than accepted upper
824 # delta lower bound is larger than accepted upper
831 # bound
825 # bound
832 tested.add(rev)
826 tested.add(rev)
833 continue
827 continue
834
828
835 # check the relative constraint on the delta size
829 # check the relative constraint on the delta size
836 revlength = self.revlog.length(rev)
830 revlength = self.revlog.length(rev)
837 if revlength < lowestrealisticdeltalen:
831 if revlength < lowestrealisticdeltalen:
838 # delta probable lower bound is larger than target
832 # delta probable lower bound is larger than target
839 # base
833 # base
840 tested.add(rev)
834 tested.add(rev)
841 continue
835 continue
842
836
843 group.append(rev)
837 group.append(rev)
844 if group:
838 if group:
845 # When the size of the candidate group is big, it can result in
839 # When the size of the candidate group is big, it can result in
846 # a quite significant performance impact. To reduce this, we
840 # a quite significant performance impact. To reduce this, we
847 # can send them in smaller batches until the new batch does not
841 # can send them in smaller batches until the new batch does not
848 # provide any improvements.
842 # provide any improvements.
849 #
843 #
850 # This might reduce the overall efficiency of the compression
844 # This might reduce the overall efficiency of the compression
851 # in some corner cases, but that should also prevent very
845 # in some corner cases, but that should also prevent very
852 # pathological cases from being an issue. (eg. 20 000
846 # pathological cases from being an issue. (eg. 20 000
853 # candidates).
847 # candidates).
854 #
848 #
855 # XXX note that the ordering of the group becomes important as
849 # XXX note that the ordering of the group becomes important as
856 # it now impacts the final result. The current order is
850 # it now impacts the final result. The current order is
857 # unprocessed and can be improved.
851 # unprocessed and can be improved.
858 if group_chunk_size == 0:
852 if group_chunk_size == 0:
859 tested.update(group)
853 tested.update(group)
860 good = yield tuple(group)
854 good = yield tuple(group)
861 else:
855 else:
862 prev_good = good
856 prev_good = good
863 for start in range(0, len(group), group_chunk_size):
857 for start in range(0, len(group), group_chunk_size):
864 sub_group = group[start : start + group_chunk_size]
858 sub_group = group[start : start + group_chunk_size]
865 tested.update(sub_group)
859 tested.update(sub_group)
866 good = yield tuple(sub_group)
860 good = yield tuple(sub_group)
867 if prev_good == good:
861 if prev_good == good:
868 break
862 break
869
863
870 yield None
864 yield None
871
865
866 def _refined_groups(self):
867 good = None
868 # First we try to reuse a the delta contained in the bundle. (or from
869 # the source revlog)
870 #
871 # This logic only applies to general delta repositories and can be
872 # disabled through configuration. Disabling reuse source delta is
873 # useful when we want to make sure we recomputed "optimal" deltas.
874 debug_info = None
875 if (
876 self.cachedelta is not None
877 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
878 ):
879 # Assume what we received from the server is a good choice
880 # build delta will reuse the cache
881 if debug_info is not None:
882 debug_info['cached-delta.tested'] += 1
883 good = yield (self.cachedelta[0],)
884 if good is not None:
885 if debug_info is not None:
886 debug_info['cached-delta.accepted'] += 1
887 yield None
888 return
889 if self.snapshot_cache is None:
890 self.snapshot_cache = SnapshotCache()
891 groups = _rawgroups(
892 self.revlog,
893 self.p1,
894 self.p2,
895 self.cachedelta,
896 self.snapshot_cache,
897 )
898 for candidates in groups:
899 good = yield candidates
900 if good is not None:
901 break
872
902
873 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
903 # If sparse revlog is enabled, we can try to refine the available
874 good = None
904 # deltas
875 # First we try to reuse a the delta contained in the bundle.
905 if not self.revlog.delta_config.sparse_revlog:
876 # (or from the source revlog)
877 #
878 # This logic only applies to general delta repositories and can be disabled
879 # through configuration. Disabling reuse source delta is useful when
880 # we want to make sure we recomputed "optimal" deltas.
881 debug_info = None
882 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
883 # Assume what we received from the server is a good choice
884 # build delta will reuse the cache
885 if debug_info is not None:
886 debug_info['cached-delta.tested'] += 1
887 good = yield (cachedelta[0],)
888 if good is not None:
889 if debug_info is not None:
890 debug_info['cached-delta.accepted'] += 1
891 yield None
906 yield None
892 return
907 return
893 if snapshot_cache is None:
894 snapshot_cache = SnapshotCache()
895 groups = _rawgroups(
896 revlog,
897 p1,
898 p2,
899 cachedelta,
900 snapshot_cache,
901 )
902 for candidates in groups:
903 good = yield candidates
904 if good is not None:
905 break
906
907 # If sparse revlog is enabled, we can try to refine the available deltas
908 if not revlog.delta_config.sparse_revlog:
909 yield None
910 return
911
908
912 # if we have a refinable value, try to refine it
909 # if we have a refinable value, try to refine it
913 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
910 if (
914 # refine snapshot down
911 good is not None
915 previous = None
912 and good not in (self.p1, self.p2)
916 while previous != good:
913 and self.revlog.issnapshot(good)
917 previous = good
914 ):
918 base = revlog.deltaparent(good)
915 # refine snapshot down
919 if base == nullrev:
916 previous = None
920 break
917 while previous != good:
921 good = yield (base,)
918 previous = good
922 # refine snapshot up
919 base = self.revlog.deltaparent(good)
923 if not snapshot_cache.snapshots:
920 if base == nullrev:
924 snapshot_cache.update(revlog, good + 1)
921 break
925 previous = None
922 good = yield (base,)
926 while good != previous:
923 # refine snapshot up
927 previous = good
924 if not self.snapshot_cache.snapshots:
928 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
925 self.snapshot_cache.update(self.revlog, good + 1)
929 good = yield children
926 previous = None
927 while good != previous:
928 previous = good
929 children = tuple(
930 sorted(c for c in self.snapshot_cache.snapshots[good])
931 )
932 good = yield children
930
933
931 if debug_info is not None:
934 if debug_info is not None:
932 if good is None:
935 if good is None:
933 debug_info['no-solution'] += 1
936 debug_info['no-solution'] += 1
934
937
935 yield None
938 yield None
936
939
937
940
938 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
941 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
939 """Provides group of revision to be tested as delta base
942 """Provides group of revision to be tested as delta base
940
943
941 This lower level function focus on emitting delta theorically interresting
944 This lower level function focus on emitting delta theorically interresting
942 without looking it any practical details.
945 without looking it any practical details.
943
946
944 The group order aims at providing fast or small candidates first.
947 The group order aims at providing fast or small candidates first.
945 """
948 """
946 # Why search for delta base if we cannot use a delta base ?
949 # Why search for delta base if we cannot use a delta base ?
947 assert revlog.delta_config.general_delta
950 assert revlog.delta_config.general_delta
948 # also see issue6056
951 # also see issue6056
949 sparse = revlog.delta_config.sparse_revlog
952 sparse = revlog.delta_config.sparse_revlog
950 curr = len(revlog)
953 curr = len(revlog)
951 prev = curr - 1
954 prev = curr - 1
952 deltachain = lambda rev: revlog._deltachain(rev)[0]
955 deltachain = lambda rev: revlog._deltachain(rev)[0]
953
956
954 # exclude already lazy tested base if any
957 # exclude already lazy tested base if any
955 parents = [p for p in (p1, p2) if p != nullrev]
958 parents = [p for p in (p1, p2) if p != nullrev]
956
959
957 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
960 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
958 parents.sort()
961 parents.sort()
959 # To minimize the chance of having to build a fulltext,
962 # To minimize the chance of having to build a fulltext,
960 # pick first whichever parent is closest to us (max rev)
963 # pick first whichever parent is closest to us (max rev)
961 yield (parents[1],)
964 yield (parents[1],)
962 # then the other one (min rev) if the first did not fit
965 # then the other one (min rev) if the first did not fit
963 yield (parents[0],)
966 yield (parents[0],)
964 elif len(parents) > 0:
967 elif len(parents) > 0:
965 # Test all parents (1 or 2), and keep the best candidate
968 # Test all parents (1 or 2), and keep the best candidate
966 yield parents
969 yield parents
967
970
968 if sparse and parents:
971 if sparse and parents:
969 if snapshot_cache is None:
972 if snapshot_cache is None:
970 # map: base-rev: [snapshot-revs]
973 # map: base-rev: [snapshot-revs]
971 snapshot_cache = SnapshotCache()
974 snapshot_cache = SnapshotCache()
972 # See if we can use an existing snapshot in the parent chains to use as
975 # See if we can use an existing snapshot in the parent chains to use as
973 # a base for a new intermediate-snapshot
976 # a base for a new intermediate-snapshot
974 #
977 #
975 # search for snapshot in parents delta chain
978 # search for snapshot in parents delta chain
976 # map: snapshot-level: snapshot-rev
979 # map: snapshot-level: snapshot-rev
977 parents_snaps = collections.defaultdict(set)
980 parents_snaps = collections.defaultdict(set)
978 candidate_chains = [deltachain(p) for p in parents]
981 candidate_chains = [deltachain(p) for p in parents]
979 for chain in candidate_chains:
982 for chain in candidate_chains:
980 for idx, s in enumerate(chain):
983 for idx, s in enumerate(chain):
981 if not revlog.issnapshot(s):
984 if not revlog.issnapshot(s):
982 break
985 break
983 parents_snaps[idx].add(s)
986 parents_snaps[idx].add(s)
984 snapfloor = min(parents_snaps[0]) + 1
987 snapfloor = min(parents_snaps[0]) + 1
985 snapshot_cache.update(revlog, snapfloor)
988 snapshot_cache.update(revlog, snapfloor)
986 # search for the highest "unrelated" revision
989 # search for the highest "unrelated" revision
987 #
990 #
988 # Adding snapshots used by "unrelated" revision increase the odd we
991 # Adding snapshots used by "unrelated" revision increase the odd we
989 # reuse an independant, yet better snapshot chain.
992 # reuse an independant, yet better snapshot chain.
990 #
993 #
991 # XXX instead of building a set of revisions, we could lazily enumerate
994 # XXX instead of building a set of revisions, we could lazily enumerate
992 # over the chains. That would be more efficient, however we stick to
995 # over the chains. That would be more efficient, however we stick to
993 # simple code for now.
996 # simple code for now.
994 all_revs = set()
997 all_revs = set()
995 for chain in candidate_chains:
998 for chain in candidate_chains:
996 all_revs.update(chain)
999 all_revs.update(chain)
997 other = None
1000 other = None
998 for r in revlog.revs(prev, snapfloor):
1001 for r in revlog.revs(prev, snapfloor):
999 if r not in all_revs:
1002 if r not in all_revs:
1000 other = r
1003 other = r
1001 break
1004 break
1002 if other is not None:
1005 if other is not None:
1003 # To avoid unfair competition, we won't use unrelated intermediate
1006 # To avoid unfair competition, we won't use unrelated intermediate
1004 # snapshot that are deeper than the ones from the parent delta
1007 # snapshot that are deeper than the ones from the parent delta
1005 # chain.
1008 # chain.
1006 max_depth = max(parents_snaps.keys())
1009 max_depth = max(parents_snaps.keys())
1007 chain = deltachain(other)
1010 chain = deltachain(other)
1008 for depth, s in enumerate(chain):
1011 for depth, s in enumerate(chain):
1009 if s < snapfloor:
1012 if s < snapfloor:
1010 continue
1013 continue
1011 if max_depth < depth:
1014 if max_depth < depth:
1012 break
1015 break
1013 if not revlog.issnapshot(s):
1016 if not revlog.issnapshot(s):
1014 break
1017 break
1015 parents_snaps[depth].add(s)
1018 parents_snaps[depth].add(s)
1016 # Test them as possible intermediate snapshot base
1019 # Test them as possible intermediate snapshot base
1017 # We test them from highest to lowest level. High level one are more
1020 # We test them from highest to lowest level. High level one are more
1018 # likely to result in small delta
1021 # likely to result in small delta
1019 floor = None
1022 floor = None
1020 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1023 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1021 siblings = set()
1024 siblings = set()
1022 for s in snaps:
1025 for s in snaps:
1023 siblings.update(snapshot_cache.snapshots[s])
1026 siblings.update(snapshot_cache.snapshots[s])
1024 # Before considering making a new intermediate snapshot, we check
1027 # Before considering making a new intermediate snapshot, we check
1025 # if an existing snapshot, children of base we consider, would be
1028 # if an existing snapshot, children of base we consider, would be
1026 # suitable.
1029 # suitable.
1027 #
1030 #
1028 # It give a change to reuse a delta chain "unrelated" to the
1031 # It give a change to reuse a delta chain "unrelated" to the
1029 # current revision instead of starting our own. Without such
1032 # current revision instead of starting our own. Without such
1030 # re-use, topological branches would keep reopening new chains.
1033 # re-use, topological branches would keep reopening new chains.
1031 # Creating more and more snapshot as the repository grow.
1034 # Creating more and more snapshot as the repository grow.
1032
1035
1033 if floor is not None:
1036 if floor is not None:
1034 # We only do this for siblings created after the one in our
1037 # We only do this for siblings created after the one in our
1035 # parent's delta chain. Those created before has less chances
1038 # parent's delta chain. Those created before has less chances
1036 # to be valid base since our ancestors had to create a new
1039 # to be valid base since our ancestors had to create a new
1037 # snapshot.
1040 # snapshot.
1038 siblings = [r for r in siblings if floor < r]
1041 siblings = [r for r in siblings if floor < r]
1039 yield tuple(sorted(siblings))
1042 yield tuple(sorted(siblings))
1040 # then test the base from our parent's delta chain.
1043 # then test the base from our parent's delta chain.
1041 yield tuple(sorted(snaps))
1044 yield tuple(sorted(snaps))
1042 floor = min(snaps)
1045 floor = min(snaps)
1043 # No suitable base found in the parent chain, search if any full
1046 # No suitable base found in the parent chain, search if any full
1044 # snapshots emitted since parent's base would be a suitable base for an
1047 # snapshots emitted since parent's base would be a suitable base for an
1045 # intermediate snapshot.
1048 # intermediate snapshot.
1046 #
1049 #
1047 # It give a chance to reuse a delta chain unrelated to the current
1050 # It give a chance to reuse a delta chain unrelated to the current
1048 # revisions instead of starting our own. Without such re-use,
1051 # revisions instead of starting our own. Without such re-use,
1049 # topological branches would keep reopening new full chains. Creating
1052 # topological branches would keep reopening new full chains. Creating
1050 # more and more snapshot as the repository grow.
1053 # more and more snapshot as the repository grow.
1051 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1054 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1052 yield tuple(sorted(full))
1055 yield tuple(sorted(full))
1053
1056
1054 if not sparse:
1057 if not sparse:
1055 # other approach failed try against prev to hopefully save us a
1058 # other approach failed try against prev to hopefully save us a
1056 # fulltext.
1059 # fulltext.
1057 yield (prev,)
1060 yield (prev,)
1058
1061
1059
1062
1060 class SnapshotCache:
1063 class SnapshotCache:
1061 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1064 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1062
1065
1063 def __init__(self):
1066 def __init__(self):
1064 self.snapshots = collections.defaultdict(set)
1067 self.snapshots = collections.defaultdict(set)
1065 self._start_rev = None
1068 self._start_rev = None
1066 self._end_rev = None
1069 self._end_rev = None
1067
1070
1068 def update(self, revlog, start_rev=0):
1071 def update(self, revlog, start_rev=0):
1069 """find snapshots from start_rev to tip"""
1072 """find snapshots from start_rev to tip"""
1070 nb_revs = len(revlog)
1073 nb_revs = len(revlog)
1071 end_rev = nb_revs - 1
1074 end_rev = nb_revs - 1
1072 if start_rev > end_rev:
1075 if start_rev > end_rev:
1073 return # range is empty
1076 return # range is empty
1074
1077
1075 if self._start_rev is None:
1078 if self._start_rev is None:
1076 assert self._end_rev is None
1079 assert self._end_rev is None
1077 self._update(revlog, start_rev, end_rev)
1080 self._update(revlog, start_rev, end_rev)
1078 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1081 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1079 if start_rev < self._start_rev:
1082 if start_rev < self._start_rev:
1080 self._update(revlog, start_rev, self._start_rev - 1)
1083 self._update(revlog, start_rev, self._start_rev - 1)
1081 if self._end_rev < end_rev:
1084 if self._end_rev < end_rev:
1082 self._update(revlog, self._end_rev + 1, end_rev)
1085 self._update(revlog, self._end_rev + 1, end_rev)
1083
1086
1084 if self._start_rev is None:
1087 if self._start_rev is None:
1085 assert self._end_rev is None
1088 assert self._end_rev is None
1086 self._end_rev = end_rev
1089 self._end_rev = end_rev
1087 self._start_rev = start_rev
1090 self._start_rev = start_rev
1088 else:
1091 else:
1089 self._start_rev = min(self._start_rev, start_rev)
1092 self._start_rev = min(self._start_rev, start_rev)
1090 self._end_rev = max(self._end_rev, end_rev)
1093 self._end_rev = max(self._end_rev, end_rev)
1091 assert self._start_rev <= self._end_rev, (
1094 assert self._start_rev <= self._end_rev, (
1092 self._start_rev,
1095 self._start_rev,
1093 self._end_rev,
1096 self._end_rev,
1094 )
1097 )
1095
1098
1096 def _update(self, revlog, start_rev, end_rev):
1099 def _update(self, revlog, start_rev, end_rev):
1097 """internal method that actually do update content"""
1100 """internal method that actually do update content"""
1098 assert self._start_rev is None or (
1101 assert self._start_rev is None or (
1099 start_rev < self._start_rev or start_rev > self._end_rev
1102 start_rev < self._start_rev or start_rev > self._end_rev
1100 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1103 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1101 assert self._start_rev is None or (
1104 assert self._start_rev is None or (
1102 end_rev < self._start_rev or end_rev > self._end_rev
1105 end_rev < self._start_rev or end_rev > self._end_rev
1103 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1106 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1104 cache = self.snapshots
1107 cache = self.snapshots
1105 if hasattr(revlog.index, 'findsnapshots'):
1108 if hasattr(revlog.index, 'findsnapshots'):
1106 revlog.index.findsnapshots(cache, start_rev, end_rev)
1109 revlog.index.findsnapshots(cache, start_rev, end_rev)
1107 else:
1110 else:
1108 deltaparent = revlog.deltaparent
1111 deltaparent = revlog.deltaparent
1109 issnapshot = revlog.issnapshot
1112 issnapshot = revlog.issnapshot
1110 for rev in revlog.revs(start_rev, end_rev):
1113 for rev in revlog.revs(start_rev, end_rev):
1111 if issnapshot(rev):
1114 if issnapshot(rev):
1112 cache[deltaparent(rev)].add(rev)
1115 cache[deltaparent(rev)].add(rev)
1113
1116
1114
1117
1115 class deltacomputer:
1118 class deltacomputer:
1116 """object capable of computing delta and finding delta for multiple revision
1119 """object capable of computing delta and finding delta for multiple revision
1117
1120
1118 This object is meant to compute and find multiple delta applied to the same
1121 This object is meant to compute and find multiple delta applied to the same
1119 revlog.
1122 revlog.
1120 """
1123 """
1121
1124
1122 def __init__(
1125 def __init__(
1123 self,
1126 self,
1124 revlog,
1127 revlog,
1125 write_debug=None,
1128 write_debug=None,
1126 debug_search=False,
1129 debug_search=False,
1127 debug_info=None,
1130 debug_info=None,
1128 ):
1131 ):
1129 self.revlog = revlog
1132 self.revlog = revlog
1130 self._write_debug = write_debug
1133 self._write_debug = write_debug
1131 if write_debug is None:
1134 if write_debug is None:
1132 self._debug_search = False
1135 self._debug_search = False
1133 else:
1136 else:
1134 self._debug_search = debug_search
1137 self._debug_search = debug_search
1135 self._debug_info = debug_info
1138 self._debug_info = debug_info
1136 self._snapshot_cache = SnapshotCache()
1139 self._snapshot_cache = SnapshotCache()
1137
1140
1138 @property
1141 @property
1139 def _gather_debug(self):
1142 def _gather_debug(self):
1140 return self._write_debug is not None or self._debug_info is not None
1143 return self._write_debug is not None or self._debug_info is not None
1141
1144
1142 def buildtext(self, revinfo):
1145 def buildtext(self, revinfo):
1143 """Builds a fulltext version of a revision
1146 """Builds a fulltext version of a revision
1144
1147
1145 revinfo: revisioninfo instance that contains all needed info
1148 revinfo: revisioninfo instance that contains all needed info
1146 """
1149 """
1147 btext = revinfo.btext
1150 btext = revinfo.btext
1148 if btext[0] is not None:
1151 if btext[0] is not None:
1149 return btext[0]
1152 return btext[0]
1150
1153
1151 revlog = self.revlog
1154 revlog = self.revlog
1152 cachedelta = revinfo.cachedelta
1155 cachedelta = revinfo.cachedelta
1153 baserev = cachedelta[0]
1156 baserev = cachedelta[0]
1154 delta = cachedelta[1]
1157 delta = cachedelta[1]
1155
1158
1156 fulltext = btext[0] = _textfromdelta(
1159 fulltext = btext[0] = _textfromdelta(
1157 revlog,
1160 revlog,
1158 baserev,
1161 baserev,
1159 delta,
1162 delta,
1160 revinfo.p1,
1163 revinfo.p1,
1161 revinfo.p2,
1164 revinfo.p2,
1162 revinfo.flags,
1165 revinfo.flags,
1163 revinfo.node,
1166 revinfo.node,
1164 )
1167 )
1165 return fulltext
1168 return fulltext
1166
1169
1167 def _builddeltadiff(self, base, revinfo):
1170 def _builddeltadiff(self, base, revinfo):
1168 revlog = self.revlog
1171 revlog = self.revlog
1169 t = self.buildtext(revinfo)
1172 t = self.buildtext(revinfo)
1170 if revlog.iscensored(base):
1173 if revlog.iscensored(base):
1171 # deltas based on a censored revision must replace the
1174 # deltas based on a censored revision must replace the
1172 # full content in one patch, so delta works everywhere
1175 # full content in one patch, so delta works everywhere
1173 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1176 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1174 delta = header + t
1177 delta = header + t
1175 else:
1178 else:
1176 ptext = revlog.rawdata(base)
1179 ptext = revlog.rawdata(base)
1177 delta = mdiff.textdiff(ptext, t)
1180 delta = mdiff.textdiff(ptext, t)
1178
1181
1179 return delta
1182 return delta
1180
1183
1181 def _builddeltainfo(self, revinfo, base, target_rev=None):
1184 def _builddeltainfo(self, revinfo, base, target_rev=None):
1182 # can we use the cached delta?
1185 # can we use the cached delta?
1183 revlog = self.revlog
1186 revlog = self.revlog
1184 chainbase = revlog.chainbase(base)
1187 chainbase = revlog.chainbase(base)
1185 if revlog.delta_config.general_delta:
1188 if revlog.delta_config.general_delta:
1186 deltabase = base
1189 deltabase = base
1187 else:
1190 else:
1188 if target_rev is not None and base != target_rev - 1:
1191 if target_rev is not None and base != target_rev - 1:
1189 msg = (
1192 msg = (
1190 b'general delta cannot use delta for something else '
1193 b'general delta cannot use delta for something else '
1191 b'than `prev`: %d<-%d'
1194 b'than `prev`: %d<-%d'
1192 )
1195 )
1193 msg %= (base, target_rev)
1196 msg %= (base, target_rev)
1194 raise error.ProgrammingError(msg)
1197 raise error.ProgrammingError(msg)
1195 deltabase = chainbase
1198 deltabase = chainbase
1196 snapshotdepth = None
1199 snapshotdepth = None
1197 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1200 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1198 snapshotdepth = 0
1201 snapshotdepth = 0
1199 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1202 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1200 # A delta chain should always be one full snapshot,
1203 # A delta chain should always be one full snapshot,
1201 # zero or more semi-snapshots, and zero or more deltas
1204 # zero or more semi-snapshots, and zero or more deltas
1202 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1205 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1203 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1206 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1204 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1207 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1205 delta = None
1208 delta = None
1206 if revinfo.cachedelta:
1209 if revinfo.cachedelta:
1207 cachebase = revinfo.cachedelta[0]
1210 cachebase = revinfo.cachedelta[0]
1208 # check if the diff still apply
1211 # check if the diff still apply
1209 currentbase = cachebase
1212 currentbase = cachebase
1210 while (
1213 while (
1211 currentbase != nullrev
1214 currentbase != nullrev
1212 and currentbase != base
1215 and currentbase != base
1213 and self.revlog.length(currentbase) == 0
1216 and self.revlog.length(currentbase) == 0
1214 ):
1217 ):
1215 currentbase = self.revlog.deltaparent(currentbase)
1218 currentbase = self.revlog.deltaparent(currentbase)
1216 if self.revlog.delta_config.lazy_delta and currentbase == base:
1219 if self.revlog.delta_config.lazy_delta and currentbase == base:
1217 delta = revinfo.cachedelta[1]
1220 delta = revinfo.cachedelta[1]
1218 if delta is None:
1221 if delta is None:
1219 delta = self._builddeltadiff(base, revinfo)
1222 delta = self._builddeltadiff(base, revinfo)
1220 if self._debug_search:
1223 if self._debug_search:
1221 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1224 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1222 msg %= len(delta)
1225 msg %= len(delta)
1223 self._write_debug(msg)
1226 self._write_debug(msg)
1224 # snapshotdept need to be neither None nor 0 level snapshot
1227 # snapshotdept need to be neither None nor 0 level snapshot
1225 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1228 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1226 lowestrealisticdeltalen = (
1229 lowestrealisticdeltalen = (
1227 len(delta) // revlog.delta_config.upper_bound_comp
1230 len(delta) // revlog.delta_config.upper_bound_comp
1228 )
1231 )
1229 snapshotlimit = revinfo.textlen >> snapshotdepth
1232 snapshotlimit = revinfo.textlen >> snapshotdepth
1230 if self._debug_search:
1233 if self._debug_search:
1231 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1234 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1232 msg %= lowestrealisticdeltalen
1235 msg %= lowestrealisticdeltalen
1233 self._write_debug(msg)
1236 self._write_debug(msg)
1234 if snapshotlimit < lowestrealisticdeltalen:
1237 if snapshotlimit < lowestrealisticdeltalen:
1235 if self._debug_search:
1238 if self._debug_search:
1236 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1239 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1237 self._write_debug(msg)
1240 self._write_debug(msg)
1238 return None
1241 return None
1239 if revlog.length(base) < lowestrealisticdeltalen:
1242 if revlog.length(base) < lowestrealisticdeltalen:
1240 if self._debug_search:
1243 if self._debug_search:
1241 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1244 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1242 self._write_debug(msg)
1245 self._write_debug(msg)
1243 return None
1246 return None
1244 header, data = revlog._inner.compress(delta)
1247 header, data = revlog._inner.compress(delta)
1245 deltalen = len(header) + len(data)
1248 deltalen = len(header) + len(data)
1246 offset = revlog.end(len(revlog) - 1)
1249 offset = revlog.end(len(revlog) - 1)
1247 dist = deltalen + offset - revlog.start(chainbase)
1250 dist = deltalen + offset - revlog.start(chainbase)
1248 chainlen, compresseddeltalen = revlog._chaininfo(base)
1251 chainlen, compresseddeltalen = revlog._chaininfo(base)
1249 chainlen += 1
1252 chainlen += 1
1250 compresseddeltalen += deltalen
1253 compresseddeltalen += deltalen
1251
1254
1252 return _deltainfo(
1255 return _deltainfo(
1253 dist,
1256 dist,
1254 deltalen,
1257 deltalen,
1255 (header, data),
1258 (header, data),
1256 deltabase,
1259 deltabase,
1257 chainbase,
1260 chainbase,
1258 chainlen,
1261 chainlen,
1259 compresseddeltalen,
1262 compresseddeltalen,
1260 snapshotdepth,
1263 snapshotdepth,
1261 )
1264 )
1262
1265
1263 def _fullsnapshotinfo(self, revinfo, curr):
1266 def _fullsnapshotinfo(self, revinfo, curr):
1264 rawtext = self.buildtext(revinfo)
1267 rawtext = self.buildtext(revinfo)
1265 data = self.revlog._inner.compress(rawtext)
1268 data = self.revlog._inner.compress(rawtext)
1266 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1269 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1267 deltabase = chainbase = curr
1270 deltabase = chainbase = curr
1268 snapshotdepth = 0
1271 snapshotdepth = 0
1269 chainlen = 1
1272 chainlen = 1
1270
1273
1271 return _deltainfo(
1274 return _deltainfo(
1272 dist,
1275 dist,
1273 deltalen,
1276 deltalen,
1274 data,
1277 data,
1275 deltabase,
1278 deltabase,
1276 chainbase,
1279 chainbase,
1277 chainlen,
1280 chainlen,
1278 compresseddeltalen,
1281 compresseddeltalen,
1279 snapshotdepth,
1282 snapshotdepth,
1280 )
1283 )
1281
1284
1282 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1285 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1283 """Find an acceptable delta against a candidate revision
1286 """Find an acceptable delta against a candidate revision
1284
1287
1285 revinfo: information about the revision (instance of _revisioninfo)
1288 revinfo: information about the revision (instance of _revisioninfo)
1286
1289
1287 Returns the first acceptable candidate revision, as ordered by
1290 Returns the first acceptable candidate revision, as ordered by
1288 _candidategroups
1291 _candidategroups
1289
1292
1290 If no suitable deltabase is found, we return delta info for a full
1293 If no suitable deltabase is found, we return delta info for a full
1291 snapshot.
1294 snapshot.
1292
1295
1293 `excluded_bases` is an optional set of revision that cannot be used as
1296 `excluded_bases` is an optional set of revision that cannot be used as
1294 a delta base. Use this to recompute delta suitable in censor or strip
1297 a delta base. Use this to recompute delta suitable in censor or strip
1295 context.
1298 context.
1296 """
1299 """
1297 if target_rev is None:
1300 if target_rev is None:
1298 target_rev = len(self.revlog)
1301 target_rev = len(self.revlog)
1299
1302
1300 gather_debug = self._gather_debug
1303 gather_debug = self._gather_debug
1301 cachedelta = revinfo.cachedelta
1304 cachedelta = revinfo.cachedelta
1302 revlog = self.revlog
1305 revlog = self.revlog
1303 p1r = p2r = None
1306 p1r = p2r = None
1304
1307
1305 if excluded_bases is None:
1308 if excluded_bases is None:
1306 excluded_bases = set()
1309 excluded_bases = set()
1307
1310
1308 if gather_debug:
1311 if gather_debug:
1309 start = util.timer()
1312 start = util.timer()
1310 dbg = self._one_dbg_data()
1313 dbg = self._one_dbg_data()
1311 dbg['revision'] = target_rev
1314 dbg['revision'] = target_rev
1312 target_revlog = b"UNKNOWN"
1315 target_revlog = b"UNKNOWN"
1313 target_type = self.revlog.target[0]
1316 target_type = self.revlog.target[0]
1314 target_key = self.revlog.target[1]
1317 target_key = self.revlog.target[1]
1315 if target_type == KIND_CHANGELOG:
1318 if target_type == KIND_CHANGELOG:
1316 target_revlog = b'CHANGELOG:'
1319 target_revlog = b'CHANGELOG:'
1317 elif target_type == KIND_MANIFESTLOG:
1320 elif target_type == KIND_MANIFESTLOG:
1318 target_revlog = b'MANIFESTLOG:'
1321 target_revlog = b'MANIFESTLOG:'
1319 if target_key:
1322 if target_key:
1320 target_revlog += b'%s:' % target_key
1323 target_revlog += b'%s:' % target_key
1321 elif target_type == KIND_FILELOG:
1324 elif target_type == KIND_FILELOG:
1322 target_revlog = b'FILELOG:'
1325 target_revlog = b'FILELOG:'
1323 if target_key:
1326 if target_key:
1324 target_revlog += b'%s:' % target_key
1327 target_revlog += b'%s:' % target_key
1325 dbg['target-revlog'] = target_revlog
1328 dbg['target-revlog'] = target_revlog
1326 p1r = revlog.rev(revinfo.p1)
1329 p1r = revlog.rev(revinfo.p1)
1327 p2r = revlog.rev(revinfo.p2)
1330 p2r = revlog.rev(revinfo.p2)
1328 if p1r != nullrev:
1331 if p1r != nullrev:
1329 p1_chain_len = revlog._chaininfo(p1r)[0]
1332 p1_chain_len = revlog._chaininfo(p1r)[0]
1330 else:
1333 else:
1331 p1_chain_len = -1
1334 p1_chain_len = -1
1332 if p2r != nullrev:
1335 if p2r != nullrev:
1333 p2_chain_len = revlog._chaininfo(p2r)[0]
1336 p2_chain_len = revlog._chaininfo(p2r)[0]
1334 else:
1337 else:
1335 p2_chain_len = -1
1338 p2_chain_len = -1
1336 dbg['p1-chain-len'] = p1_chain_len
1339 dbg['p1-chain-len'] = p1_chain_len
1337 dbg['p2-chain-len'] = p2_chain_len
1340 dbg['p2-chain-len'] = p2_chain_len
1338
1341
1339 # 1) if the revision is empty, no amount of delta can beat it
1342 # 1) if the revision is empty, no amount of delta can beat it
1340 #
1343 #
1341 # 2) no delta for flag processor revision (see "candelta" for why)
1344 # 2) no delta for flag processor revision (see "candelta" for why)
1342 # not calling candelta since only one revision needs test, also to
1345 # not calling candelta since only one revision needs test, also to
1343 # avoid overhead fetching flags again.
1346 # avoid overhead fetching flags again.
1344 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1347 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1345 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1348 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1346 if gather_debug:
1349 if gather_debug:
1347 end = util.timer()
1350 end = util.timer()
1348 dbg['duration'] = end - start
1351 dbg['duration'] = end - start
1349 dbg[
1352 dbg[
1350 'delta-base'
1353 'delta-base'
1351 ] = deltainfo.base # pytype: disable=attribute-error
1354 ] = deltainfo.base # pytype: disable=attribute-error
1352 dbg['search_round_count'] = 0
1355 dbg['search_round_count'] = 0
1353 dbg['using-cached-base'] = False
1356 dbg['using-cached-base'] = False
1354 dbg['delta_try_count'] = 0
1357 dbg['delta_try_count'] = 0
1355 dbg['type'] = b"full"
1358 dbg['type'] = b"full"
1356 dbg['snapshot-depth'] = 0
1359 dbg['snapshot-depth'] = 0
1357 self._dbg_process_data(dbg)
1360 self._dbg_process_data(dbg)
1358 return deltainfo
1361 return deltainfo
1359
1362
1360 deltainfo = None
1363 deltainfo = None
1361
1364
1362 # If this source delta are to be forcibly reuse, let us comply early.
1365 # If this source delta are to be forcibly reuse, let us comply early.
1363 if (
1366 if (
1364 revlog.delta_config.general_delta
1367 revlog.delta_config.general_delta
1365 and revinfo.cachedelta is not None
1368 and revinfo.cachedelta is not None
1366 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1369 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1367 ):
1370 ):
1368 base = revinfo.cachedelta[0]
1371 base = revinfo.cachedelta[0]
1369 if base == nullrev:
1372 if base == nullrev:
1370 dbg_type = b"full"
1373 dbg_type = b"full"
1371 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1374 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1372 if gather_debug:
1375 if gather_debug:
1373 snapshotdepth = 0
1376 snapshotdepth = 0
1374 elif base not in excluded_bases:
1377 elif base not in excluded_bases:
1375 delta = revinfo.cachedelta[1]
1378 delta = revinfo.cachedelta[1]
1376 header, data = revlog.compress(delta)
1379 header, data = revlog.compress(delta)
1377 deltalen = len(header) + len(data)
1380 deltalen = len(header) + len(data)
1378 if gather_debug:
1381 if gather_debug:
1379 offset = revlog.end(len(revlog) - 1)
1382 offset = revlog.end(len(revlog) - 1)
1380 chainbase = revlog.chainbase(base)
1383 chainbase = revlog.chainbase(base)
1381 distance = deltalen + offset - revlog.start(chainbase)
1384 distance = deltalen + offset - revlog.start(chainbase)
1382 chainlen, compresseddeltalen = revlog._chaininfo(base)
1385 chainlen, compresseddeltalen = revlog._chaininfo(base)
1383 chainlen += 1
1386 chainlen += 1
1384 compresseddeltalen += deltalen
1387 compresseddeltalen += deltalen
1385 if base == p1r or base == p2r:
1388 if base == p1r or base == p2r:
1386 dbg_type = b"delta"
1389 dbg_type = b"delta"
1387 snapshotdepth = None
1390 snapshotdepth = None
1388 elif not revlog.issnapshot(base):
1391 elif not revlog.issnapshot(base):
1389 snapshotdepth = None
1392 snapshotdepth = None
1390 else:
1393 else:
1391 dbg_type = b"snapshot"
1394 dbg_type = b"snapshot"
1392 snapshotdepth = revlog.snapshotdepth(base) + 1
1395 snapshotdepth = revlog.snapshotdepth(base) + 1
1393 else:
1396 else:
1394 distance = None
1397 distance = None
1395 chainbase = None
1398 chainbase = None
1396 chainlen = None
1399 chainlen = None
1397 compresseddeltalen = None
1400 compresseddeltalen = None
1398 snapshotdepth = None
1401 snapshotdepth = None
1399 deltainfo = _deltainfo(
1402 deltainfo = _deltainfo(
1400 distance=distance,
1403 distance=distance,
1401 deltalen=deltalen,
1404 deltalen=deltalen,
1402 data=(header, data),
1405 data=(header, data),
1403 base=base,
1406 base=base,
1404 chainbase=chainbase,
1407 chainbase=chainbase,
1405 chainlen=chainlen,
1408 chainlen=chainlen,
1406 compresseddeltalen=compresseddeltalen,
1409 compresseddeltalen=compresseddeltalen,
1407 snapshotdepth=snapshotdepth,
1410 snapshotdepth=snapshotdepth,
1408 )
1411 )
1409
1412
1410 if deltainfo is not None:
1413 if deltainfo is not None:
1411 if gather_debug:
1414 if gather_debug:
1412 end = util.timer()
1415 end = util.timer()
1413 dbg['duration'] = end - start
1416 dbg['duration'] = end - start
1414 dbg[
1417 dbg[
1415 'delta-base'
1418 'delta-base'
1416 ] = deltainfo.base # pytype: disable=attribute-error
1419 ] = deltainfo.base # pytype: disable=attribute-error
1417 dbg['search_round_count'] = 0
1420 dbg['search_round_count'] = 0
1418 dbg['using-cached-base'] = True
1421 dbg['using-cached-base'] = True
1419 dbg['delta_try_count'] = 0
1422 dbg['delta_try_count'] = 0
1420 dbg['type'] = b"full"
1423 dbg['type'] = b"full"
1421 if snapshotdepth is None:
1424 if snapshotdepth is None:
1422 dbg['snapshot-depth'] = 0
1425 dbg['snapshot-depth'] = 0
1423 else:
1426 else:
1424 dbg['snapshot-depth'] = snapshotdepth
1427 dbg['snapshot-depth'] = snapshotdepth
1425 self._dbg_process_data(dbg)
1428 self._dbg_process_data(dbg)
1426 return deltainfo
1429 return deltainfo
1427
1430
1428 # count the number of different delta we tried (for debug purpose)
1431 # count the number of different delta we tried (for debug purpose)
1429 dbg_try_count = 0
1432 dbg_try_count = 0
1430 # count the number of "search round" we did. (for debug purpose)
1433 # count the number of "search round" we did. (for debug purpose)
1431 dbg_try_rounds = 0
1434 dbg_try_rounds = 0
1432 dbg_type = b'unknown'
1435 dbg_type = b'unknown'
1433
1436
1434 if p1r is None:
1437 if p1r is None:
1435 p1r = revlog.rev(revinfo.p1)
1438 p1r = revlog.rev(revinfo.p1)
1436 p2r = revlog.rev(revinfo.p2)
1439 p2r = revlog.rev(revinfo.p2)
1437
1440
1438 if self._debug_search:
1441 if self._debug_search:
1439 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1442 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1440 msg %= target_rev
1443 msg %= target_rev
1441 self._write_debug(msg)
1444 self._write_debug(msg)
1442
1445
1443 search = _DeltaSearch(
1446 search = _DeltaSearch(
1444 self.revlog,
1447 self.revlog,
1445 revinfo.textlen,
1448 revinfo.textlen,
1446 p1r,
1449 p1r,
1447 p2r,
1450 p2r,
1448 cachedelta,
1451 cachedelta,
1449 excluded_bases,
1452 excluded_bases,
1450 target_rev,
1453 target_rev,
1451 snapshot_cache=self._snapshot_cache,
1454 snapshot_cache=self._snapshot_cache,
1452 )
1455 )
1453
1456
1454 groups = search.candidate_groups()
1457 groups = search.candidate_groups()
1455 candidaterevs = next(groups)
1458 candidaterevs = next(groups)
1456 while candidaterevs is not None:
1459 while candidaterevs is not None:
1457 dbg_try_rounds += 1
1460 dbg_try_rounds += 1
1458 if self._debug_search:
1461 if self._debug_search:
1459 prev = None
1462 prev = None
1460 if deltainfo is not None:
1463 if deltainfo is not None:
1461 prev = deltainfo.base
1464 prev = deltainfo.base
1462
1465
1463 if (
1466 if (
1464 cachedelta is not None
1467 cachedelta is not None
1465 and len(candidaterevs) == 1
1468 and len(candidaterevs) == 1
1466 and cachedelta[0] in candidaterevs
1469 and cachedelta[0] in candidaterevs
1467 ):
1470 ):
1468 round_type = b"cached-delta"
1471 round_type = b"cached-delta"
1469 elif p1r in candidaterevs or p2r in candidaterevs:
1472 elif p1r in candidaterevs or p2r in candidaterevs:
1470 round_type = b"parents"
1473 round_type = b"parents"
1471 elif prev is not None and all(c < prev for c in candidaterevs):
1474 elif prev is not None and all(c < prev for c in candidaterevs):
1472 round_type = b"refine-down"
1475 round_type = b"refine-down"
1473 elif prev is not None and all(c > prev for c in candidaterevs):
1476 elif prev is not None and all(c > prev for c in candidaterevs):
1474 round_type = b"refine-up"
1477 round_type = b"refine-up"
1475 else:
1478 else:
1476 round_type = b"search-down"
1479 round_type = b"search-down"
1477 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1480 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1478 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1481 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1479 self._write_debug(msg)
1482 self._write_debug(msg)
1480 nominateddeltas = []
1483 nominateddeltas = []
1481 if deltainfo is not None:
1484 if deltainfo is not None:
1482 if self._debug_search:
1485 if self._debug_search:
1483 msg = (
1486 msg = (
1484 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1487 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1485 )
1488 )
1486 msg %= (deltainfo.base, deltainfo.deltalen)
1489 msg %= (deltainfo.base, deltainfo.deltalen)
1487 self._write_debug(msg)
1490 self._write_debug(msg)
1488 # if we already found a good delta,
1491 # if we already found a good delta,
1489 # challenge it against refined candidates
1492 # challenge it against refined candidates
1490 nominateddeltas.append(deltainfo)
1493 nominateddeltas.append(deltainfo)
1491 for candidaterev in candidaterevs:
1494 for candidaterev in candidaterevs:
1492 if self._debug_search:
1495 if self._debug_search:
1493 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1496 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1494 msg %= candidaterev
1497 msg %= candidaterev
1495 self._write_debug(msg)
1498 self._write_debug(msg)
1496 candidate_type = None
1499 candidate_type = None
1497 if candidaterev == p1r:
1500 if candidaterev == p1r:
1498 candidate_type = b"p1"
1501 candidate_type = b"p1"
1499 elif candidaterev == p2r:
1502 elif candidaterev == p2r:
1500 candidate_type = b"p2"
1503 candidate_type = b"p2"
1501 elif self.revlog.issnapshot(candidaterev):
1504 elif self.revlog.issnapshot(candidaterev):
1502 candidate_type = b"snapshot-%d"
1505 candidate_type = b"snapshot-%d"
1503 candidate_type %= self.revlog.snapshotdepth(
1506 candidate_type %= self.revlog.snapshotdepth(
1504 candidaterev
1507 candidaterev
1505 )
1508 )
1506
1509
1507 if candidate_type is not None:
1510 if candidate_type is not None:
1508 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1511 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1509 msg %= candidate_type
1512 msg %= candidate_type
1510 self._write_debug(msg)
1513 self._write_debug(msg)
1511 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1514 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1512 msg %= self.revlog.length(candidaterev)
1515 msg %= self.revlog.length(candidaterev)
1513 self._write_debug(msg)
1516 self._write_debug(msg)
1514 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1517 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1515 msg %= self.revlog.deltaparent(candidaterev)
1518 msg %= self.revlog.deltaparent(candidaterev)
1516 self._write_debug(msg)
1519 self._write_debug(msg)
1517
1520
1518 dbg_try_count += 1
1521 dbg_try_count += 1
1519
1522
1520 if self._debug_search:
1523 if self._debug_search:
1521 delta_start = util.timer()
1524 delta_start = util.timer()
1522 candidatedelta = self._builddeltainfo(
1525 candidatedelta = self._builddeltainfo(
1523 revinfo,
1526 revinfo,
1524 candidaterev,
1527 candidaterev,
1525 target_rev=target_rev,
1528 target_rev=target_rev,
1526 )
1529 )
1527 if self._debug_search:
1530 if self._debug_search:
1528 delta_end = util.timer()
1531 delta_end = util.timer()
1529 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1532 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1530 msg %= delta_end - delta_start
1533 msg %= delta_end - delta_start
1531 self._write_debug(msg)
1534 self._write_debug(msg)
1532 if candidatedelta is not None:
1535 if candidatedelta is not None:
1533 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1536 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1534 if self._debug_search:
1537 if self._debug_search:
1535 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1538 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1536 msg %= candidatedelta.deltalen
1539 msg %= candidatedelta.deltalen
1537 self._write_debug(msg)
1540 self._write_debug(msg)
1538 nominateddeltas.append(candidatedelta)
1541 nominateddeltas.append(candidatedelta)
1539 elif self._debug_search:
1542 elif self._debug_search:
1540 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1543 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1541 msg %= candidatedelta.deltalen
1544 msg %= candidatedelta.deltalen
1542 self._write_debug(msg)
1545 self._write_debug(msg)
1543 elif self._debug_search:
1546 elif self._debug_search:
1544 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1547 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1545 self._write_debug(msg)
1548 self._write_debug(msg)
1546 if nominateddeltas:
1549 if nominateddeltas:
1547 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1550 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1548 if deltainfo is not None:
1551 if deltainfo is not None:
1549 candidaterevs = groups.send(deltainfo.base)
1552 candidaterevs = groups.send(deltainfo.base)
1550 else:
1553 else:
1551 candidaterevs = next(groups)
1554 candidaterevs = next(groups)
1552
1555
1553 if deltainfo is None:
1556 if deltainfo is None:
1554 dbg_type = b"full"
1557 dbg_type = b"full"
1555 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1558 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1556 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1559 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1557 dbg_type = b"snapshot"
1560 dbg_type = b"snapshot"
1558 else:
1561 else:
1559 dbg_type = b"delta"
1562 dbg_type = b"delta"
1560
1563
1561 if gather_debug:
1564 if gather_debug:
1562 end = util.timer()
1565 end = util.timer()
1563 if dbg_type == b'full':
1566 if dbg_type == b'full':
1564 used_cached = (
1567 used_cached = (
1565 cachedelta is not None
1568 cachedelta is not None
1566 and dbg_try_rounds == 0
1569 and dbg_try_rounds == 0
1567 and dbg_try_count == 0
1570 and dbg_try_count == 0
1568 and cachedelta[0] == nullrev
1571 and cachedelta[0] == nullrev
1569 )
1572 )
1570 else:
1573 else:
1571 used_cached = (
1574 used_cached = (
1572 cachedelta is not None
1575 cachedelta is not None
1573 and dbg_try_rounds == 1
1576 and dbg_try_rounds == 1
1574 and dbg_try_count == 1
1577 and dbg_try_count == 1
1575 and deltainfo.base == cachedelta[0]
1578 and deltainfo.base == cachedelta[0]
1576 )
1579 )
1577 dbg['duration'] = end - start
1580 dbg['duration'] = end - start
1578 dbg[
1581 dbg[
1579 'delta-base'
1582 'delta-base'
1580 ] = deltainfo.base # pytype: disable=attribute-error
1583 ] = deltainfo.base # pytype: disable=attribute-error
1581 dbg['search_round_count'] = dbg_try_rounds
1584 dbg['search_round_count'] = dbg_try_rounds
1582 dbg['using-cached-base'] = used_cached
1585 dbg['using-cached-base'] = used_cached
1583 dbg['delta_try_count'] = dbg_try_count
1586 dbg['delta_try_count'] = dbg_try_count
1584 dbg['type'] = dbg_type
1587 dbg['type'] = dbg_type
1585 if (
1588 if (
1586 deltainfo.snapshotdepth # pytype: disable=attribute-error
1589 deltainfo.snapshotdepth # pytype: disable=attribute-error
1587 is not None
1590 is not None
1588 ):
1591 ):
1589 dbg[
1592 dbg[
1590 'snapshot-depth'
1593 'snapshot-depth'
1591 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1594 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1592 else:
1595 else:
1593 dbg['snapshot-depth'] = 0
1596 dbg['snapshot-depth'] = 0
1594 self._dbg_process_data(dbg)
1597 self._dbg_process_data(dbg)
1595 return deltainfo
1598 return deltainfo
1596
1599
1597 def _one_dbg_data(self):
1600 def _one_dbg_data(self):
1598 return {
1601 return {
1599 'duration': None,
1602 'duration': None,
1600 'revision': None,
1603 'revision': None,
1601 'delta-base': None,
1604 'delta-base': None,
1602 'search_round_count': None,
1605 'search_round_count': None,
1603 'using-cached-base': None,
1606 'using-cached-base': None,
1604 'delta_try_count': None,
1607 'delta_try_count': None,
1605 'type': None,
1608 'type': None,
1606 'p1-chain-len': None,
1609 'p1-chain-len': None,
1607 'p2-chain-len': None,
1610 'p2-chain-len': None,
1608 'snapshot-depth': None,
1611 'snapshot-depth': None,
1609 'target-revlog': None,
1612 'target-revlog': None,
1610 }
1613 }
1611
1614
1612 def _dbg_process_data(self, dbg):
1615 def _dbg_process_data(self, dbg):
1613 if self._debug_info is not None:
1616 if self._debug_info is not None:
1614 self._debug_info.append(dbg)
1617 self._debug_info.append(dbg)
1615
1618
1616 if self._write_debug is not None:
1619 if self._write_debug is not None:
1617 msg = (
1620 msg = (
1618 b"DBG-DELTAS:"
1621 b"DBG-DELTAS:"
1619 b" %-12s"
1622 b" %-12s"
1620 b" rev=%d:"
1623 b" rev=%d:"
1621 b" delta-base=%d"
1624 b" delta-base=%d"
1622 b" is-cached=%d"
1625 b" is-cached=%d"
1623 b" - search-rounds=%d"
1626 b" - search-rounds=%d"
1624 b" try-count=%d"
1627 b" try-count=%d"
1625 b" - delta-type=%-6s"
1628 b" - delta-type=%-6s"
1626 b" snap-depth=%d"
1629 b" snap-depth=%d"
1627 b" - p1-chain-length=%d"
1630 b" - p1-chain-length=%d"
1628 b" p2-chain-length=%d"
1631 b" p2-chain-length=%d"
1629 b" - duration=%f"
1632 b" - duration=%f"
1630 b"\n"
1633 b"\n"
1631 )
1634 )
1632 msg %= (
1635 msg %= (
1633 dbg["target-revlog"],
1636 dbg["target-revlog"],
1634 dbg["revision"],
1637 dbg["revision"],
1635 dbg["delta-base"],
1638 dbg["delta-base"],
1636 dbg["using-cached-base"],
1639 dbg["using-cached-base"],
1637 dbg["search_round_count"],
1640 dbg["search_round_count"],
1638 dbg["delta_try_count"],
1641 dbg["delta_try_count"],
1639 dbg["type"],
1642 dbg["type"],
1640 dbg["snapshot-depth"],
1643 dbg["snapshot-depth"],
1641 dbg["p1-chain-len"],
1644 dbg["p1-chain-len"],
1642 dbg["p2-chain-len"],
1645 dbg["p2-chain-len"],
1643 dbg["duration"],
1646 dbg["duration"],
1644 )
1647 )
1645 self._write_debug(msg)
1648 self._write_debug(msg)
1646
1649
1647
1650
1648 def delta_compression(default_compression_header, deltainfo):
1651 def delta_compression(default_compression_header, deltainfo):
1649 """return (COMPRESSION_MODE, deltainfo)
1652 """return (COMPRESSION_MODE, deltainfo)
1650
1653
1651 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1654 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1652 compression.
1655 compression.
1653 """
1656 """
1654 h, d = deltainfo.data
1657 h, d = deltainfo.data
1655 compression_mode = COMP_MODE_INLINE
1658 compression_mode = COMP_MODE_INLINE
1656 if not h and not d:
1659 if not h and not d:
1657 # not data to store at all... declare them uncompressed
1660 # not data to store at all... declare them uncompressed
1658 compression_mode = COMP_MODE_PLAIN
1661 compression_mode = COMP_MODE_PLAIN
1659 elif not h:
1662 elif not h:
1660 t = d[0:1]
1663 t = d[0:1]
1661 if t == b'\0':
1664 if t == b'\0':
1662 compression_mode = COMP_MODE_PLAIN
1665 compression_mode = COMP_MODE_PLAIN
1663 elif t == default_compression_header:
1666 elif t == default_compression_header:
1664 compression_mode = COMP_MODE_DEFAULT
1667 compression_mode = COMP_MODE_DEFAULT
1665 elif h == b'u':
1668 elif h == b'u':
1666 # we have a more efficient way to declare uncompressed
1669 # we have a more efficient way to declare uncompressed
1667 h = b''
1670 h = b''
1668 compression_mode = COMP_MODE_PLAIN
1671 compression_mode = COMP_MODE_PLAIN
1669 deltainfo = drop_u_compression(deltainfo)
1672 deltainfo = drop_u_compression(deltainfo)
1670 return compression_mode, deltainfo
1673 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now