##// END OF EJS Templates
delta-search: fix crash caused by unbound variable...
Raphaël Gomès -
r52402:e7bbeaa4 stable
parent child Browse files
Show More
@@ -1,1883 +1,1884 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import abc
11 import abc
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 from .. import revlog
51 from .. import revlog
52
52
53 self._data = data
53 self._data = data
54 self.data_config = revlog.DataConfig()
54 self.data_config = revlog.DataConfig()
55 self.data_config.sr_density_threshold = density
55 self.data_config.sr_density_threshold = density
56 self.data_config.sr_min_gap_size = mingap
56 self.data_config.sr_min_gap_size = mingap
57 self.delta_config = revlog.DeltaConfig()
57 self.delta_config = revlog.DeltaConfig()
58 self.feature_config = revlog.FeatureConfig()
58 self.feature_config = revlog.FeatureConfig()
59 self._snapshot = set(snapshot)
59 self._snapshot = set(snapshot)
60 self.index = None
60 self.index = None
61
61
62 def start(self, rev):
62 def start(self, rev):
63 if rev == nullrev:
63 if rev == nullrev:
64 return 0
64 return 0
65 if rev == 0:
65 if rev == 0:
66 return 0
66 return 0
67 return self._data[rev - 1]
67 return self._data[rev - 1]
68
68
69 def end(self, rev):
69 def end(self, rev):
70 if rev == nullrev:
70 if rev == nullrev:
71 return 0
71 return 0
72 return self._data[rev]
72 return self._data[rev]
73
73
74 def length(self, rev):
74 def length(self, rev):
75 return self.end(rev) - self.start(rev)
75 return self.end(rev) - self.start(rev)
76
76
77 def __len__(self):
77 def __len__(self):
78 return len(self._data)
78 return len(self._data)
79
79
80 def issnapshot(self, rev):
80 def issnapshot(self, rev):
81 if rev == nullrev:
81 if rev == nullrev:
82 return True
82 return True
83 return rev in self._snapshot
83 return rev in self._snapshot
84
84
85
85
86 def slicechunk(revlog, revs, targetsize=None):
86 def slicechunk(revlog, revs, targetsize=None):
87 """slice revs to reduce the amount of unrelated data to be read from disk.
87 """slice revs to reduce the amount of unrelated data to be read from disk.
88
88
89 ``revs`` is sliced into groups that should be read in one time.
89 ``revs`` is sliced into groups that should be read in one time.
90 Assume that revs are sorted.
90 Assume that revs are sorted.
91
91
92 The initial chunk is sliced until the overall density (payload/chunks-span
92 The initial chunk is sliced until the overall density (payload/chunks-span
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 than `revlog.data_config.sr_min_gap_size` is skipped.
94 than `revlog.data_config.sr_min_gap_size` is skipped.
95
95
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 For consistency with other slicing choice, this limit won't go lower than
97 For consistency with other slicing choice, this limit won't go lower than
98 `revlog.data_config.sr_min_gap_size`.
98 `revlog.data_config.sr_min_gap_size`.
99
99
100 If individual revisions chunk are larger than this limit, they will still
100 If individual revisions chunk are larger than this limit, they will still
101 be raised individually.
101 be raised individually.
102
102
103 >>> data = [
103 >>> data = [
104 ... 5, #00 (5)
104 ... 5, #00 (5)
105 ... 10, #01 (5)
105 ... 10, #01 (5)
106 ... 12, #02 (2)
106 ... 12, #02 (2)
107 ... 12, #03 (empty)
107 ... 12, #03 (empty)
108 ... 27, #04 (15)
108 ... 27, #04 (15)
109 ... 31, #05 (4)
109 ... 31, #05 (4)
110 ... 31, #06 (empty)
110 ... 31, #06 (empty)
111 ... 42, #07 (11)
111 ... 42, #07 (11)
112 ... 47, #08 (5)
112 ... 47, #08 (5)
113 ... 47, #09 (empty)
113 ... 47, #09 (empty)
114 ... 48, #10 (1)
114 ... 48, #10 (1)
115 ... 51, #11 (3)
115 ... 51, #11 (3)
116 ... 74, #12 (23)
116 ... 74, #12 (23)
117 ... 85, #13 (11)
117 ... 85, #13 (11)
118 ... 86, #14 (1)
118 ... 86, #14 (1)
119 ... 91, #15 (5)
119 ... 91, #15 (5)
120 ... ]
120 ... ]
121 >>> revlog = _testrevlog(data, snapshot=range(16))
121 >>> revlog = _testrevlog(data, snapshot=range(16))
122
122
123 >>> list(slicechunk(revlog, list(range(16))))
123 >>> list(slicechunk(revlog, list(range(16))))
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 >>> list(slicechunk(revlog, [0, 15]))
125 >>> list(slicechunk(revlog, [0, 15]))
126 [[0], [15]]
126 [[0], [15]]
127 >>> list(slicechunk(revlog, [0, 11, 15]))
127 >>> list(slicechunk(revlog, [0, 11, 15]))
128 [[0], [11], [15]]
128 [[0], [11], [15]]
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 [[0], [11, 13, 15]]
130 [[0], [11, 13, 15]]
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 [[1, 2], [5, 8, 10, 11], [14]]
132 [[1, 2], [5, 8, 10, 11], [14]]
133
133
134 Slicing with a maximum chunk size
134 Slicing with a maximum chunk size
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 [[0], [11], [13], [15]]
136 [[0], [11], [13], [15]]
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 [[0], [11], [13, 15]]
138 [[0], [11], [13, 15]]
139
139
140 Slicing involving nullrev
140 Slicing involving nullrev
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 [[-1, 0], [11], [13, 15]]
142 [[-1, 0], [11], [13, 15]]
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 [[-1], [13], [15]]
144 [[-1], [13], [15]]
145 """
145 """
146 if targetsize is not None:
146 if targetsize is not None:
147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 # targetsize should not be specified when evaluating delta candidates:
148 # targetsize should not be specified when evaluating delta candidates:
149 # * targetsize is used to ensure we stay within specification when reading,
149 # * targetsize is used to ensure we stay within specification when reading,
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 if densityslicing is None:
151 if densityslicing is None:
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 for chunk in densityslicing(
153 for chunk in densityslicing(
154 revs,
154 revs,
155 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_density_threshold,
156 revlog.data_config.sr_min_gap_size,
156 revlog.data_config.sr_min_gap_size,
157 ):
157 ):
158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
159 yield subchunk
159 yield subchunk
160
160
161
161
162 def _slicechunktosize(revlog, revs, targetsize=None):
162 def _slicechunktosize(revlog, revs, targetsize=None):
163 """slice revs to match the target size
163 """slice revs to match the target size
164
164
165 This is intended to be used on chunk that density slicing selected by that
165 This is intended to be used on chunk that density slicing selected by that
166 are still too large compared to the read garantee of revlog. This might
166 are still too large compared to the read garantee of revlog. This might
167 happens when "minimal gap size" interrupted the slicing or when chain are
167 happens when "minimal gap size" interrupted the slicing or when chain are
168 built in a way that create large blocks next to each other.
168 built in a way that create large blocks next to each other.
169
169
170 >>> data = [
170 >>> data = [
171 ... 3, #0 (3)
171 ... 3, #0 (3)
172 ... 5, #1 (2)
172 ... 5, #1 (2)
173 ... 6, #2 (1)
173 ... 6, #2 (1)
174 ... 8, #3 (2)
174 ... 8, #3 (2)
175 ... 8, #4 (empty)
175 ... 8, #4 (empty)
176 ... 11, #5 (3)
176 ... 11, #5 (3)
177 ... 12, #6 (1)
177 ... 12, #6 (1)
178 ... 13, #7 (1)
178 ... 13, #7 (1)
179 ... 14, #8 (1)
179 ... 14, #8 (1)
180 ... ]
180 ... ]
181
181
182 == All snapshots cases ==
182 == All snapshots cases ==
183 >>> revlog = _testrevlog(data, snapshot=range(9))
183 >>> revlog = _testrevlog(data, snapshot=range(9))
184
184
185 Cases where chunk is already small enough
185 Cases where chunk is already small enough
186 >>> list(_slicechunktosize(revlog, [0], 3))
186 >>> list(_slicechunktosize(revlog, [0], 3))
187 [[0]]
187 [[0]]
188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
189 [[6, 7]]
189 [[6, 7]]
190 >>> list(_slicechunktosize(revlog, [0], None))
190 >>> list(_slicechunktosize(revlog, [0], None))
191 [[0]]
191 [[0]]
192 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 >>> list(_slicechunktosize(revlog, [6, 7], None))
193 [[6, 7]]
193 [[6, 7]]
194
194
195 cases where we need actual slicing
195 cases where we need actual slicing
196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
197 [[0], [1]]
197 [[0], [1]]
198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
199 [[1], [3]]
199 [[1], [3]]
200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
201 [[1, 2], [3]]
201 [[1, 2], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
203 [[3], [5]]
203 [[3], [5]]
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
205 [[3], [5]]
205 [[3], [5]]
206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
207 [[5], [6, 7, 8]]
207 [[5], [6, 7, 8]]
208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
209 [[0], [1, 2], [3], [5], [6, 7, 8]]
209 [[0], [1, 2], [3], [5], [6, 7, 8]]
210
210
211 Case with too large individual chunk (must return valid chunk)
211 Case with too large individual chunk (must return valid chunk)
212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
213 [[0], [1]]
213 [[0], [1]]
214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
215 [[1], [3]]
215 [[1], [3]]
216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
217 [[3], [5]]
217 [[3], [5]]
218
218
219 == No Snapshot cases ==
219 == No Snapshot cases ==
220 >>> revlog = _testrevlog(data)
220 >>> revlog = _testrevlog(data)
221
221
222 Cases where chunk is already small enough
222 Cases where chunk is already small enough
223 >>> list(_slicechunktosize(revlog, [0], 3))
223 >>> list(_slicechunktosize(revlog, [0], 3))
224 [[0]]
224 [[0]]
225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
226 [[6, 7]]
226 [[6, 7]]
227 >>> list(_slicechunktosize(revlog, [0], None))
227 >>> list(_slicechunktosize(revlog, [0], None))
228 [[0]]
228 [[0]]
229 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 >>> list(_slicechunktosize(revlog, [6, 7], None))
230 [[6, 7]]
230 [[6, 7]]
231
231
232 cases where we need actual slicing
232 cases where we need actual slicing
233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
234 [[0], [1]]
234 [[0], [1]]
235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
236 [[1], [3]]
236 [[1], [3]]
237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
238 [[1], [2, 3]]
238 [[1], [2, 3]]
239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
240 [[3], [5]]
240 [[3], [5]]
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
242 [[3], [4, 5]]
242 [[3], [4, 5]]
243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
244 [[5], [6, 7, 8]]
244 [[5], [6, 7, 8]]
245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
246 [[0], [1, 2], [3], [5], [6, 7, 8]]
246 [[0], [1, 2], [3], [5], [6, 7, 8]]
247
247
248 Case with too large individual chunk (must return valid chunk)
248 Case with too large individual chunk (must return valid chunk)
249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
250 [[0], [1]]
250 [[0], [1]]
251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
252 [[1], [3]]
252 [[1], [3]]
253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
254 [[3], [5]]
254 [[3], [5]]
255
255
256 == mixed case ==
256 == mixed case ==
257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
260 """
260 """
261 assert targetsize is None or 0 <= targetsize
261 assert targetsize is None or 0 <= targetsize
262 startdata = revlog.start(revs[0])
262 startdata = revlog.start(revs[0])
263 enddata = revlog.end(revs[-1])
263 enddata = revlog.end(revs[-1])
264 fullspan = enddata - startdata
264 fullspan = enddata - startdata
265 if targetsize is None or fullspan <= targetsize:
265 if targetsize is None or fullspan <= targetsize:
266 yield revs
266 yield revs
267 return
267 return
268
268
269 startrevidx = 0
269 startrevidx = 0
270 endrevidx = 1
270 endrevidx = 1
271 iterrevs = enumerate(revs)
271 iterrevs = enumerate(revs)
272 next(iterrevs) # skip first rev.
272 next(iterrevs) # skip first rev.
273 # first step: get snapshots out of the way
273 # first step: get snapshots out of the way
274 for idx, r in iterrevs:
274 for idx, r in iterrevs:
275 span = revlog.end(r) - startdata
275 span = revlog.end(r) - startdata
276 snapshot = revlog.issnapshot(r)
276 snapshot = revlog.issnapshot(r)
277 if span <= targetsize and snapshot:
277 if span <= targetsize and snapshot:
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 else:
279 else:
280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
281 if chunk:
281 if chunk:
282 yield chunk
282 yield chunk
283 startrevidx = idx
283 startrevidx = idx
284 startdata = revlog.start(r)
284 startdata = revlog.start(r)
285 endrevidx = idx + 1
285 endrevidx = idx + 1
286 if not snapshot:
286 if not snapshot:
287 break
287 break
288
288
289 # for the others, we use binary slicing to quickly converge toward valid
289 # for the others, we use binary slicing to quickly converge toward valid
290 # chunks (otherwise, we might end up looking for start/end of many
290 # chunks (otherwise, we might end up looking for start/end of many
291 # revisions). This logic is not looking for the perfect slicing point, it
291 # revisions). This logic is not looking for the perfect slicing point, it
292 # focuses on quickly converging toward valid chunks.
292 # focuses on quickly converging toward valid chunks.
293 nbitem = len(revs)
293 nbitem = len(revs)
294 while (enddata - startdata) > targetsize:
294 while (enddata - startdata) > targetsize:
295 endrevidx = nbitem
295 endrevidx = nbitem
296 if nbitem - startrevidx <= 1:
296 if nbitem - startrevidx <= 1:
297 break # protect against individual chunk larger than limit
297 break # protect against individual chunk larger than limit
298 localenddata = revlog.end(revs[endrevidx - 1])
298 localenddata = revlog.end(revs[endrevidx - 1])
299 span = localenddata - startdata
299 span = localenddata - startdata
300 while span > targetsize:
300 while span > targetsize:
301 if endrevidx - startrevidx <= 1:
301 if endrevidx - startrevidx <= 1:
302 break # protect against individual chunk larger than limit
302 break # protect against individual chunk larger than limit
303 endrevidx -= (endrevidx - startrevidx) // 2
303 endrevidx -= (endrevidx - startrevidx) // 2
304 localenddata = revlog.end(revs[endrevidx - 1])
304 localenddata = revlog.end(revs[endrevidx - 1])
305 span = localenddata - startdata
305 span = localenddata - startdata
306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
307 if chunk:
307 if chunk:
308 yield chunk
308 yield chunk
309 startrevidx = endrevidx
309 startrevidx = endrevidx
310 startdata = revlog.start(revs[startrevidx])
310 startdata = revlog.start(revs[startrevidx])
311
311
312 chunk = _trimchunk(revlog, revs, startrevidx)
312 chunk = _trimchunk(revlog, revs, startrevidx)
313 if chunk:
313 if chunk:
314 yield chunk
314 yield chunk
315
315
316
316
317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
318 """slice revs to reduce the amount of unrelated data to be read from disk.
318 """slice revs to reduce the amount of unrelated data to be read from disk.
319
319
320 ``revs`` is sliced into groups that should be read in one time.
320 ``revs`` is sliced into groups that should be read in one time.
321 Assume that revs are sorted.
321 Assume that revs are sorted.
322
322
323 The initial chunk is sliced until the overall density (payload/chunks-span
323 The initial chunk is sliced until the overall density (payload/chunks-span
324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
325 skipped.
325 skipped.
326
326
327 >>> revlog = _testrevlog([
327 >>> revlog = _testrevlog([
328 ... 5, #00 (5)
328 ... 5, #00 (5)
329 ... 10, #01 (5)
329 ... 10, #01 (5)
330 ... 12, #02 (2)
330 ... 12, #02 (2)
331 ... 12, #03 (empty)
331 ... 12, #03 (empty)
332 ... 27, #04 (15)
332 ... 27, #04 (15)
333 ... 31, #05 (4)
333 ... 31, #05 (4)
334 ... 31, #06 (empty)
334 ... 31, #06 (empty)
335 ... 42, #07 (11)
335 ... 42, #07 (11)
336 ... 47, #08 (5)
336 ... 47, #08 (5)
337 ... 47, #09 (empty)
337 ... 47, #09 (empty)
338 ... 48, #10 (1)
338 ... 48, #10 (1)
339 ... 51, #11 (3)
339 ... 51, #11 (3)
340 ... 74, #12 (23)
340 ... 74, #12 (23)
341 ... 85, #13 (11)
341 ... 85, #13 (11)
342 ... 86, #14 (1)
342 ... 86, #14 (1)
343 ... 91, #15 (5)
343 ... 91, #15 (5)
344 ... ])
344 ... ])
345
345
346 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 >>> list(_slicechunktodensity(revlog, list(range(16))))
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 >>> list(_slicechunktodensity(revlog, [0, 15]))
349 [[0], [15]]
349 [[0], [15]]
350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
351 [[0], [11], [15]]
351 [[0], [11], [15]]
352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
353 [[0], [11, 13, 15]]
353 [[0], [11, 13, 15]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 ... mingapsize=20))
357 ... mingapsize=20))
358 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 [[1, 2, 3, 5, 8, 10, 11], [14]]
359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 ... targetdensity=0.95))
360 ... targetdensity=0.95))
361 [[1, 2], [5], [8, 10, 11], [14]]
361 [[1, 2], [5], [8, 10, 11], [14]]
362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
363 ... targetdensity=0.95, mingapsize=12))
363 ... targetdensity=0.95, mingapsize=12))
364 [[1, 2], [5, 8, 10, 11], [14]]
364 [[1, 2], [5, 8, 10, 11], [14]]
365 """
365 """
366 start = revlog.start
366 start = revlog.start
367 length = revlog.length
367 length = revlog.length
368
368
369 if len(revs) <= 1:
369 if len(revs) <= 1:
370 yield revs
370 yield revs
371 return
371 return
372
372
373 deltachainspan = segmentspan(revlog, revs)
373 deltachainspan = segmentspan(revlog, revs)
374
374
375 if deltachainspan < mingapsize:
375 if deltachainspan < mingapsize:
376 yield revs
376 yield revs
377 return
377 return
378
378
379 readdata = deltachainspan
379 readdata = deltachainspan
380 chainpayload = sum(length(r) for r in revs)
380 chainpayload = sum(length(r) for r in revs)
381
381
382 if deltachainspan:
382 if deltachainspan:
383 density = chainpayload / float(deltachainspan)
383 density = chainpayload / float(deltachainspan)
384 else:
384 else:
385 density = 1.0
385 density = 1.0
386
386
387 if density >= targetdensity:
387 if density >= targetdensity:
388 yield revs
388 yield revs
389 return
389 return
390
390
391 # Store the gaps in a heap to have them sorted by decreasing size
391 # Store the gaps in a heap to have them sorted by decreasing size
392 gaps = []
392 gaps = []
393 prevend = None
393 prevend = None
394 for i, rev in enumerate(revs):
394 for i, rev in enumerate(revs):
395 revstart = start(rev)
395 revstart = start(rev)
396 revlen = length(rev)
396 revlen = length(rev)
397
397
398 # Skip empty revisions to form larger holes
398 # Skip empty revisions to form larger holes
399 if revlen == 0:
399 if revlen == 0:
400 continue
400 continue
401
401
402 if prevend is not None:
402 if prevend is not None:
403 gapsize = revstart - prevend
403 gapsize = revstart - prevend
404 # only consider holes that are large enough
404 # only consider holes that are large enough
405 if gapsize > mingapsize:
405 if gapsize > mingapsize:
406 gaps.append((gapsize, i))
406 gaps.append((gapsize, i))
407
407
408 prevend = revstart + revlen
408 prevend = revstart + revlen
409 # sort the gaps to pop them from largest to small
409 # sort the gaps to pop them from largest to small
410 gaps.sort()
410 gaps.sort()
411
411
412 # Collect the indices of the largest holes until the density is acceptable
412 # Collect the indices of the largest holes until the density is acceptable
413 selected = []
413 selected = []
414 while gaps and density < targetdensity:
414 while gaps and density < targetdensity:
415 gapsize, gapidx = gaps.pop()
415 gapsize, gapidx = gaps.pop()
416
416
417 selected.append(gapidx)
417 selected.append(gapidx)
418
418
419 # the gap sizes are stored as negatives to be sorted decreasingly
419 # the gap sizes are stored as negatives to be sorted decreasingly
420 # by the heap
420 # by the heap
421 readdata -= gapsize
421 readdata -= gapsize
422 if readdata > 0:
422 if readdata > 0:
423 density = chainpayload / float(readdata)
423 density = chainpayload / float(readdata)
424 else:
424 else:
425 density = 1.0
425 density = 1.0
426 selected.sort()
426 selected.sort()
427
427
428 # Cut the revs at collected indices
428 # Cut the revs at collected indices
429 previdx = 0
429 previdx = 0
430 for idx in selected:
430 for idx in selected:
431
431
432 chunk = _trimchunk(revlog, revs, previdx, idx)
432 chunk = _trimchunk(revlog, revs, previdx, idx)
433 if chunk:
433 if chunk:
434 yield chunk
434 yield chunk
435
435
436 previdx = idx
436 previdx = idx
437
437
438 chunk = _trimchunk(revlog, revs, previdx)
438 chunk = _trimchunk(revlog, revs, previdx)
439 if chunk:
439 if chunk:
440 yield chunk
440 yield chunk
441
441
442
442
443 def _trimchunk(revlog, revs, startidx, endidx=None):
443 def _trimchunk(revlog, revs, startidx, endidx=None):
444 """returns revs[startidx:endidx] without empty trailing revs
444 """returns revs[startidx:endidx] without empty trailing revs
445
445
446 Doctest Setup
446 Doctest Setup
447 >>> revlog = _testrevlog([
447 >>> revlog = _testrevlog([
448 ... 5, #0
448 ... 5, #0
449 ... 10, #1
449 ... 10, #1
450 ... 12, #2
450 ... 12, #2
451 ... 12, #3 (empty)
451 ... 12, #3 (empty)
452 ... 17, #4
452 ... 17, #4
453 ... 21, #5
453 ... 21, #5
454 ... 21, #6 (empty)
454 ... 21, #6 (empty)
455 ... ])
455 ... ])
456
456
457 Contiguous cases:
457 Contiguous cases:
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
459 [0, 1, 2, 3, 4, 5]
459 [0, 1, 2, 3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
461 [0, 1, 2, 3, 4]
461 [0, 1, 2, 3, 4]
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
463 [0, 1, 2]
463 [0, 1, 2]
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
465 [2]
465 [2]
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
467 [3, 4, 5]
467 [3, 4, 5]
468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
469 [3, 4]
469 [3, 4]
470
470
471 Discontiguous cases:
471 Discontiguous cases:
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
473 [1, 3, 5]
473 [1, 3, 5]
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
475 [1]
475 [1]
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
477 [3, 5]
477 [3, 5]
478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
479 [3, 5]
479 [3, 5]
480 """
480 """
481 length = revlog.length
481 length = revlog.length
482
482
483 if endidx is None:
483 if endidx is None:
484 endidx = len(revs)
484 endidx = len(revs)
485
485
486 # If we have a non-emtpy delta candidate, there are nothing to trim
486 # If we have a non-emtpy delta candidate, there are nothing to trim
487 if revs[endidx - 1] < len(revlog):
487 if revs[endidx - 1] < len(revlog):
488 # Trim empty revs at the end, except the very first revision of a chain
488 # Trim empty revs at the end, except the very first revision of a chain
489 while (
489 while (
490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
491 ):
491 ):
492 endidx -= 1
492 endidx -= 1
493
493
494 return revs[startidx:endidx]
494 return revs[startidx:endidx]
495
495
496
496
497 def segmentspan(revlog, revs):
497 def segmentspan(revlog, revs):
498 """Get the byte span of a segment of revisions
498 """Get the byte span of a segment of revisions
499
499
500 revs is a sorted array of revision numbers
500 revs is a sorted array of revision numbers
501
501
502 >>> revlog = _testrevlog([
502 >>> revlog = _testrevlog([
503 ... 5, #0
503 ... 5, #0
504 ... 10, #1
504 ... 10, #1
505 ... 12, #2
505 ... 12, #2
506 ... 12, #3 (empty)
506 ... 12, #3 (empty)
507 ... 17, #4
507 ... 17, #4
508 ... ])
508 ... ])
509
509
510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
511 17
511 17
512 >>> segmentspan(revlog, [0, 4])
512 >>> segmentspan(revlog, [0, 4])
513 17
513 17
514 >>> segmentspan(revlog, [3, 4])
514 >>> segmentspan(revlog, [3, 4])
515 5
515 5
516 >>> segmentspan(revlog, [1, 2, 3,])
516 >>> segmentspan(revlog, [1, 2, 3,])
517 7
517 7
518 >>> segmentspan(revlog, [1, 3])
518 >>> segmentspan(revlog, [1, 3])
519 7
519 7
520 """
520 """
521 if not revs:
521 if not revs:
522 return 0
522 return 0
523 end = revlog.end(revs[-1])
523 end = revlog.end(revs[-1])
524 return end - revlog.start(revs[0])
524 return end - revlog.start(revs[0])
525
525
526
526
527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
528 """build full text from a (base, delta) pair and other metadata"""
528 """build full text from a (base, delta) pair and other metadata"""
529 # special case deltas which replace entire base; no need to decode
529 # special case deltas which replace entire base; no need to decode
530 # base revision. this neatly avoids censored bases, which throw when
530 # base revision. this neatly avoids censored bases, which throw when
531 # they're decoded.
531 # they're decoded.
532 hlen = struct.calcsize(b">lll")
532 hlen = struct.calcsize(b">lll")
533 if delta[:hlen] == mdiff.replacediffheader(
533 if delta[:hlen] == mdiff.replacediffheader(
534 revlog.rawsize(baserev), len(delta) - hlen
534 revlog.rawsize(baserev), len(delta) - hlen
535 ):
535 ):
536 fulltext = delta[hlen:]
536 fulltext = delta[hlen:]
537 else:
537 else:
538 # deltabase is rawtext before changed by flag processors, which is
538 # deltabase is rawtext before changed by flag processors, which is
539 # equivalent to non-raw text
539 # equivalent to non-raw text
540 basetext = revlog.revision(baserev)
540 basetext = revlog.revision(baserev)
541 fulltext = mdiff.patch(basetext, delta)
541 fulltext = mdiff.patch(basetext, delta)
542
542
543 try:
543 try:
544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
545 if validatehash:
545 if validatehash:
546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
547 if flags & REVIDX_ISCENSORED:
547 if flags & REVIDX_ISCENSORED:
548 raise error.StorageError(
548 raise error.StorageError(
549 _(b'node %s is not censored') % expectednode
549 _(b'node %s is not censored') % expectednode
550 )
550 )
551 except error.CensoredNodeError:
551 except error.CensoredNodeError:
552 # must pass the censored index flag to add censored revisions
552 # must pass the censored index flag to add censored revisions
553 if not flags & REVIDX_ISCENSORED:
553 if not flags & REVIDX_ISCENSORED:
554 raise
554 raise
555 return fulltext
555 return fulltext
556
556
557
557
558 @attr.s(slots=True, frozen=True)
558 @attr.s(slots=True, frozen=True)
559 class _deltainfo:
559 class _deltainfo:
560 distance = attr.ib()
560 distance = attr.ib()
561 deltalen = attr.ib()
561 deltalen = attr.ib()
562 data = attr.ib()
562 data = attr.ib()
563 base = attr.ib()
563 base = attr.ib()
564 chainbase = attr.ib()
564 chainbase = attr.ib()
565 chainlen = attr.ib()
565 chainlen = attr.ib()
566 compresseddeltalen = attr.ib()
566 compresseddeltalen = attr.ib()
567 snapshotdepth = attr.ib()
567 snapshotdepth = attr.ib()
568
568
569
569
570 def drop_u_compression(delta):
570 def drop_u_compression(delta):
571 """turn into a "u" (no-compression) into no-compression without header
571 """turn into a "u" (no-compression) into no-compression without header
572
572
573 This is useful for revlog format that has better compression method.
573 This is useful for revlog format that has better compression method.
574 """
574 """
575 assert delta.data[0] == b'u', delta.data[0]
575 assert delta.data[0] == b'u', delta.data[0]
576 return _deltainfo(
576 return _deltainfo(
577 delta.distance,
577 delta.distance,
578 delta.deltalen - 1,
578 delta.deltalen - 1,
579 (b'', delta.data[1]),
579 (b'', delta.data[1]),
580 delta.base,
580 delta.base,
581 delta.chainbase,
581 delta.chainbase,
582 delta.chainlen,
582 delta.chainlen,
583 delta.compresseddeltalen,
583 delta.compresseddeltalen,
584 delta.snapshotdepth,
584 delta.snapshotdepth,
585 )
585 )
586
586
587
587
588 # If a revision's full text is that much bigger than a base candidate full
588 # If a revision's full text is that much bigger than a base candidate full
589 # text's, it is very unlikely that it will produce a valid delta. We no longer
589 # text's, it is very unlikely that it will produce a valid delta. We no longer
590 # consider these candidates.
590 # consider these candidates.
591 LIMIT_BASE2TEXT = 500
591 LIMIT_BASE2TEXT = 500
592
592
593 ### stage of the search, used for debug and to select and to adjust some logic.
593 ### stage of the search, used for debug and to select and to adjust some logic.
594 # initial stage, next step is unknown
594 # initial stage, next step is unknown
595 _STAGE_UNSPECIFIED = "unspecified"
595 _STAGE_UNSPECIFIED = "unspecified"
596 # trying the cached delta
596 # trying the cached delta
597 _STAGE_CACHED = "cached"
597 _STAGE_CACHED = "cached"
598 # trying delta based on parents
598 # trying delta based on parents
599 _STAGE_PARENTS = "parents"
599 _STAGE_PARENTS = "parents"
600 # trying to build a valid snapshot of any level
600 # trying to build a valid snapshot of any level
601 _STAGE_SNAPSHOT = "snapshot"
601 _STAGE_SNAPSHOT = "snapshot"
602 # trying to build a delta based of the previous revision
602 # trying to build a delta based of the previous revision
603 _STAGE_PREV = "prev"
603 _STAGE_PREV = "prev"
604 # trying to build a full snapshot
604 # trying to build a full snapshot
605 _STAGE_FULL = "full"
605 _STAGE_FULL = "full"
606
606
607
607
608 class _BaseDeltaSearch(abc.ABC):
608 class _BaseDeltaSearch(abc.ABC):
609 """perform the search of a good delta for a single revlog revision
609 """perform the search of a good delta for a single revlog revision
610
610
611 note: some of the deltacomputer.finddeltainfo logic should probably move
611 note: some of the deltacomputer.finddeltainfo logic should probably move
612 here.
612 here.
613 """
613 """
614
614
615 def __init__(
615 def __init__(
616 self,
616 self,
617 revlog,
617 revlog,
618 revinfo,
618 revinfo,
619 p1,
619 p1,
620 p2,
620 p2,
621 cachedelta,
621 cachedelta,
622 excluded_bases=None,
622 excluded_bases=None,
623 target_rev=None,
623 target_rev=None,
624 snapshot_cache=None,
624 snapshot_cache=None,
625 ):
625 ):
626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
627 # so we should never end up asking such question. Adding the assert as
627 # so we should never end up asking such question. Adding the assert as
628 # a safe-guard to detect anything that would be fishy in this regard.
628 # a safe-guard to detect anything that would be fishy in this regard.
629 assert (
629 assert (
630 cachedelta is None
630 cachedelta is None
631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
632 or not revlog.delta_config.general_delta
632 or not revlog.delta_config.general_delta
633 )
633 )
634 self.revlog = revlog
634 self.revlog = revlog
635 self.revinfo = revinfo
635 self.revinfo = revinfo
636 self.textlen = revinfo.textlen
636 self.textlen = revinfo.textlen
637 self.p1 = p1
637 self.p1 = p1
638 self.p2 = p2
638 self.p2 = p2
639 self.cachedelta = cachedelta
639 self.cachedelta = cachedelta
640 self.excluded_bases = excluded_bases
640 self.excluded_bases = excluded_bases
641 if target_rev is None:
641 if target_rev is None:
642 self.target_rev = len(self.revlog)
642 self.target_rev = len(self.revlog)
643 self.target_rev = target_rev
643 self.target_rev = target_rev
644 if snapshot_cache is None:
644 if snapshot_cache is None:
645 # map: base-rev: [snapshot-revs]
645 # map: base-rev: [snapshot-revs]
646 snapshot_cache = SnapshotCache()
646 snapshot_cache = SnapshotCache()
647 self.snapshot_cache = snapshot_cache
647 self.snapshot_cache = snapshot_cache
648
648
649 self.tested = {nullrev}
649 self.tested = {nullrev}
650
650
651 self.current_stage = _STAGE_UNSPECIFIED
651 self.current_stage = _STAGE_UNSPECIFIED
652 self.current_group = None
652 self.current_group = None
653 self._init_group()
653 self._init_group()
654
654
655 def is_good_delta_info(self, deltainfo):
655 def is_good_delta_info(self, deltainfo):
656 """Returns True if the given delta is good.
656 """Returns True if the given delta is good.
657
657
658 Good means that it is within the disk span, disk size, and chain length
658 Good means that it is within the disk span, disk size, and chain length
659 bounds that we know to be performant.
659 bounds that we know to be performant.
660 """
660 """
661 if not self._is_good_delta_info_universal(deltainfo):
661 if not self._is_good_delta_info_universal(deltainfo):
662 return False
662 return False
663 if not self._is_good_delta_info_chain_quality(deltainfo):
663 if not self._is_good_delta_info_chain_quality(deltainfo):
664 return False
664 return False
665 return True
665 return True
666
666
667 def _is_good_delta_info_universal(self, deltainfo):
667 def _is_good_delta_info_universal(self, deltainfo):
668 """Returns True if the given delta is good.
668 """Returns True if the given delta is good.
669
669
670 This performs generic checks needed by all format variants.
670 This performs generic checks needed by all format variants.
671
671
672 This is used by is_good_delta_info.
672 This is used by is_good_delta_info.
673 """
673 """
674
674
675 if deltainfo is None:
675 if deltainfo is None:
676 return False
676 return False
677
677
678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
679 # so we should never end up asking such question. Adding the assert as
679 # so we should never end up asking such question. Adding the assert as
680 # a safe-guard to detect anything that would be fishy in this regard.
680 # a safe-guard to detect anything that would be fishy in this regard.
681 assert (
681 assert (
682 self.revinfo.cachedelta is None
682 self.revinfo.cachedelta is None
683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
684 or not self.revlog.delta_config.general_delta
684 or not self.revlog.delta_config.general_delta
685 )
685 )
686
686
687 # Bad delta from new delta size:
687 # Bad delta from new delta size:
688 #
688 #
689 # If the delta size is larger than the target text, storing the delta
689 # If the delta size is larger than the target text, storing the delta
690 # will be inefficient.
690 # will be inefficient.
691 if self.revinfo.textlen < deltainfo.deltalen:
691 if self.revinfo.textlen < deltainfo.deltalen:
692 return False
692 return False
693
693
694 return True
694 return True
695
695
696 def _is_good_delta_info_chain_quality(self, deltainfo):
696 def _is_good_delta_info_chain_quality(self, deltainfo):
697 """Returns True if the chain associated with the delta is good.
697 """Returns True if the chain associated with the delta is good.
698
698
699 This performs checks for format that use delta chains.
699 This performs checks for format that use delta chains.
700
700
701 This is used by is_good_delta_info.
701 This is used by is_good_delta_info.
702 """
702 """
703 # - 'deltainfo.distance' is the distance from the base revision --
703 # - 'deltainfo.distance' is the distance from the base revision --
704 # bounding it limits the amount of I/O we need to do.
704 # bounding it limits the amount of I/O we need to do.
705
705
706 defaultmax = self.revinfo.textlen * 4
706 defaultmax = self.revinfo.textlen * 4
707 maxdist = self.revlog.delta_config.max_deltachain_span
707 maxdist = self.revlog.delta_config.max_deltachain_span
708 if not maxdist:
708 if not maxdist:
709 maxdist = deltainfo.distance # ensure the conditional pass
709 maxdist = deltainfo.distance # ensure the conditional pass
710 maxdist = max(maxdist, defaultmax)
710 maxdist = max(maxdist, defaultmax)
711
711
712 # Bad delta from read span:
712 # Bad delta from read span:
713 #
713 #
714 # If the span of data read is larger than the maximum allowed.
714 # If the span of data read is larger than the maximum allowed.
715 #
715 #
716 # In the sparse-revlog case, we rely on the associated "sparse
716 # In the sparse-revlog case, we rely on the associated "sparse
717 # reading" to avoid issue related to the span of data. In theory, it
717 # reading" to avoid issue related to the span of data. In theory, it
718 # would be possible to build pathological revlog where delta pattern
718 # would be possible to build pathological revlog where delta pattern
719 # would lead to too many reads. However, they do not happen in
719 # would lead to too many reads. However, they do not happen in
720 # practice at all. So we skip the span check entirely.
720 # practice at all. So we skip the span check entirely.
721 if (
721 if (
722 not self.revlog.delta_config.sparse_revlog
722 not self.revlog.delta_config.sparse_revlog
723 and maxdist < deltainfo.distance
723 and maxdist < deltainfo.distance
724 ):
724 ):
725 return False
725 return False
726
726
727 # Bad delta from cumulated payload size:
727 # Bad delta from cumulated payload size:
728 #
728 #
729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
730 # deltas we need to apply -- bounding it limits the amount of CPU
730 # deltas we need to apply -- bounding it limits the amount of CPU
731 # we consume.
731 # we consume.
732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
733 # If the sum of delta get larger than K * target text length.
733 # If the sum of delta get larger than K * target text length.
734 if max_chain_data < deltainfo.compresseddeltalen:
734 if max_chain_data < deltainfo.compresseddeltalen:
735 return False
735 return False
736
736
737 # Bad delta from chain length:
737 # Bad delta from chain length:
738 #
738 #
739 # If the number of delta in the chain gets too high.
739 # If the number of delta in the chain gets too high.
740 if (
740 if (
741 self.revlog.delta_config.max_chain_len
741 self.revlog.delta_config.max_chain_len
742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
743 ):
743 ):
744 return False
744 return False
745 return True
745 return True
746
746
747 @property
747 @property
748 def done(self):
748 def done(self):
749 """True when all possible candidate have been tested"""
749 """True when all possible candidate have been tested"""
750 return self.current_group is None
750 return self.current_group is None
751
751
752 @abc.abstractmethod
752 @abc.abstractmethod
753 def next_group(self, good_delta=None):
753 def next_group(self, good_delta=None):
754 """move to the next group to test
754 """move to the next group to test
755
755
756 The group of revision to test will be available in
756 The group of revision to test will be available in
757 `self.current_group`. If the previous group had any good delta, the
757 `self.current_group`. If the previous group had any good delta, the
758 best one can be passed as the `good_delta` parameter to help selecting
758 best one can be passed as the `good_delta` parameter to help selecting
759 the next group.
759 the next group.
760
760
761 If not revision remains to be, `self.done` will be True and
761 If not revision remains to be, `self.done` will be True and
762 `self.current_group` will be None.
762 `self.current_group` will be None.
763 """
763 """
764 pass
764 pass
765
765
766 @abc.abstractmethod
766 @abc.abstractmethod
767 def _init_group(self):
767 def _init_group(self):
768 pass
768 pass
769
769
770
770
771 class _NoDeltaSearch(_BaseDeltaSearch):
771 class _NoDeltaSearch(_BaseDeltaSearch):
772 """Search for no delta.
772 """Search for no delta.
773
773
774 This search variant is to be used in case where we should not store delta.
774 This search variant is to be used in case where we should not store delta.
775 """
775 """
776
776
777 def _init_group(self):
777 def _init_group(self):
778 self.current_stage = _STAGE_FULL
778 self.current_stage = _STAGE_FULL
779
779
780 def next_group(self, good_delta=None):
780 def next_group(self, good_delta=None):
781 pass
781 pass
782
782
783
783
784 class _PrevDeltaSearch(_BaseDeltaSearch):
784 class _PrevDeltaSearch(_BaseDeltaSearch):
785 """Search for delta against the previous revision only
785 """Search for delta against the previous revision only
786
786
787 This search variant is to be used when the format does not allow for delta
787 This search variant is to be used when the format does not allow for delta
788 against arbitrary bases.
788 against arbitrary bases.
789 """
789 """
790
790
791 def _init_group(self):
791 def _init_group(self):
792 self.current_stage = _STAGE_PREV
792 self.current_stage = _STAGE_PREV
793 self.current_group = [self.target_rev - 1]
793 self.current_group = [self.target_rev - 1]
794 self.tested.update(self.current_group)
794 self.tested.update(self.current_group)
795
795
796 def next_group(self, good_delta=None):
796 def next_group(self, good_delta=None):
797 self.current_stage = _STAGE_FULL
797 self.current_stage = _STAGE_FULL
798 self.current_group = None
798 self.current_group = None
799
799
800
800
801 class _GeneralDeltaSearch(_BaseDeltaSearch):
801 class _GeneralDeltaSearch(_BaseDeltaSearch):
802 """Delta search variant for general-delta repository"""
802 """Delta search variant for general-delta repository"""
803
803
804 def _init_group(self):
804 def _init_group(self):
805 # Why search for delta base if we cannot use a delta base ?
805 # Why search for delta base if we cannot use a delta base ?
806 # also see issue6056
806 # also see issue6056
807 assert self.revlog.delta_config.general_delta
807 assert self.revlog.delta_config.general_delta
808 self._candidates_iterator = self._iter_groups()
808 self._candidates_iterator = self._iter_groups()
809 self._last_good = None
809 self._last_good = None
810 if (
810 if (
811 self.cachedelta is not None
811 self.cachedelta is not None
812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
813 and self._pre_filter_rev(self.cachedelta[0])
813 and self._pre_filter_rev(self.cachedelta[0])
814 ):
814 ):
815 # First we try to reuse a the delta contained in the bundle. (or from
815 # First we try to reuse a the delta contained in the bundle. (or from
816 # the source revlog)
816 # the source revlog)
817 #
817 #
818 # This logic only applies to general delta repositories and can be
818 # This logic only applies to general delta repositories and can be
819 # disabled through configuration. Disabling reuse source delta is
819 # disabled through configuration. Disabling reuse source delta is
820 # useful when we want to make sure we recomputed "optimal" deltas.
820 # useful when we want to make sure we recomputed "optimal" deltas.
821 self.current_stage = _STAGE_CACHED
821 self.current_stage = _STAGE_CACHED
822 self._internal_group = (self.cachedelta[0],)
822 self._internal_group = (self.cachedelta[0],)
823 self._internal_idx = 0
823 self._internal_idx = 0
824 self.current_group = self._internal_group
824 self.current_group = self._internal_group
825 self.tested.update(self.current_group)
825 self.tested.update(self.current_group)
826 else:
826 else:
827 self._next_internal_group()
827 self._next_internal_group()
828
828
829 def _next_internal_group(self):
829 def _next_internal_group(self):
830 # self._internal_group can be larger than self.current_group
830 # self._internal_group can be larger than self.current_group
831 self._internal_idx = 0
831 self._internal_idx = 0
832 group = self._candidates_iterator.send(self._last_good)
832 group = self._candidates_iterator.send(self._last_good)
833 if group is not None:
833 if group is not None:
834 group = self._pre_filter_candidate_revs(group)
834 group = self._pre_filter_candidate_revs(group)
835 self._internal_group = group
835 self._internal_group = group
836 if self._internal_group is None:
836 if self._internal_group is None:
837 self.current_group = None
837 self.current_group = None
838 elif len(self._internal_group) == 0:
838 elif len(self._internal_group) == 0:
839 self.next_group()
839 self.next_group()
840 else:
840 else:
841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
842 if chunk_size > 0:
842 if chunk_size > 0:
843 self.current_group = self._internal_group[:chunk_size]
843 self.current_group = self._internal_group[:chunk_size]
844 self._internal_idx += chunk_size
844 self._internal_idx += chunk_size
845 else:
845 else:
846 self.current_group = self._internal_group
846 self.current_group = self._internal_group
847 self._internal_idx += len(self.current_group)
847 self._internal_idx += len(self.current_group)
848
848
849 self.tested.update(self.current_group)
849 self.tested.update(self.current_group)
850
850
851 def next_group(self, good_delta=None):
851 def next_group(self, good_delta=None):
852 old_good = self._last_good
852 old_good = self._last_good
853 if good_delta is not None:
853 if good_delta is not None:
854 self._last_good = good_delta
854 self._last_good = good_delta
855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
856 # the cache is good, let us use the cache as requested
856 # the cache is good, let us use the cache as requested
857 self._candidates_iterator = None
857 self._candidates_iterator = None
858 self._internal_group = None
858 self._internal_group = None
859 self._internal_idx = None
859 self._internal_idx = None
860 self.current_group = None
860 self.current_group = None
861 return
861 return
862
862
863 if (self._internal_idx < len(self._internal_group)) and (
863 if (self._internal_idx < len(self._internal_group)) and (
864 old_good != good_delta
864 old_good != good_delta
865 ):
865 ):
866 # When the size of the candidate group is big, it can result in
866 # When the size of the candidate group is big, it can result in
867 # a quite significant performance impact. To reduce this, we
867 # a quite significant performance impact. To reduce this, we
868 # can send them in smaller batches until the new batch does not
868 # can send them in smaller batches until the new batch does not
869 # provide any improvements.
869 # provide any improvements.
870 #
870 #
871 # This might reduce the overall efficiency of the compression
871 # This might reduce the overall efficiency of the compression
872 # in some corner cases, but that should also prevent very
872 # in some corner cases, but that should also prevent very
873 # pathological cases from being an issue. (eg. 20 000
873 # pathological cases from being an issue. (eg. 20 000
874 # candidates).
874 # candidates).
875 #
875 #
876 # XXX note that the ordering of the group becomes important as
876 # XXX note that the ordering of the group becomes important as
877 # it now impacts the final result. The current order is
877 # it now impacts the final result. The current order is
878 # unprocessed and can be improved.
878 # unprocessed and can be improved.
879 next_idx = self._internal_idx + self._group_chunk_size
879 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
880 next_idx = self._internal_idx + chunk_size
880 self.current_group = self._internal_group[
881 self.current_group = self._internal_group[
881 self._internal_idx : next_idx
882 self._internal_idx : next_idx
882 ]
883 ]
883 self.tested.update(self.current_group)
884 self.tested.update(self.current_group)
884 self._internal_idx = next_idx
885 self._internal_idx = next_idx
885 else:
886 else:
886 self._next_internal_group()
887 self._next_internal_group()
887
888
888 def _pre_filter_candidate_revs(self, temptative):
889 def _pre_filter_candidate_revs(self, temptative):
889 """filter possible candidate before computing a delta
890 """filter possible candidate before computing a delta
890
891
891 This function use various criteria to pre-filter candidate delta base
892 This function use various criteria to pre-filter candidate delta base
892 before we compute a delta and evaluate its quality.
893 before we compute a delta and evaluate its quality.
893
894
894 Such pre-filter limit the number of computed delta, an expensive operation.
895 Such pre-filter limit the number of computed delta, an expensive operation.
895
896
896 return the updated list of revision to test
897 return the updated list of revision to test
897 """
898 """
898 deltalength = self.revlog.length
899 deltalength = self.revlog.length
899 deltaparent = self.revlog.deltaparent
900 deltaparent = self.revlog.deltaparent
900
901
901 tested = self.tested
902 tested = self.tested
902 group = []
903 group = []
903 for rev in temptative:
904 for rev in temptative:
904 # skip over empty delta (no need to include them in a chain)
905 # skip over empty delta (no need to include them in a chain)
905 while not (rev == nullrev or rev in tested or deltalength(rev)):
906 while not (rev == nullrev or rev in tested or deltalength(rev)):
906 tested.add(rev)
907 tested.add(rev)
907 rev = deltaparent(rev)
908 rev = deltaparent(rev)
908 if self._pre_filter_rev(rev):
909 if self._pre_filter_rev(rev):
909 group.append(rev)
910 group.append(rev)
910 else:
911 else:
911 self.tested.add(rev)
912 self.tested.add(rev)
912 return group
913 return group
913
914
914 def _pre_filter_rev_universal(self, rev):
915 def _pre_filter_rev_universal(self, rev):
915 """pre filtering that is need in all cases.
916 """pre filtering that is need in all cases.
916
917
917 return True if it seems okay to test a rev, False otherwise.
918 return True if it seems okay to test a rev, False otherwise.
918
919
919 used by _pre_filter_rev.
920 used by _pre_filter_rev.
920 """
921 """
921 # no need to try a delta against nullrev, this will be done as
922 # no need to try a delta against nullrev, this will be done as
922 # a last resort.
923 # a last resort.
923 if rev == nullrev:
924 if rev == nullrev:
924 return False
925 return False
925 # filter out revision we tested already
926 # filter out revision we tested already
926 if rev in self.tested:
927 if rev in self.tested:
927 return False
928 return False
928
929
929 # an higher authority deamed the base unworthy (e.g. censored)
930 # an higher authority deamed the base unworthy (e.g. censored)
930 if self.excluded_bases is not None and rev in self.excluded_bases:
931 if self.excluded_bases is not None and rev in self.excluded_bases:
931 return False
932 return False
932 # We are in some recomputation cases and that rev is too high
933 # We are in some recomputation cases and that rev is too high
933 # in the revlog
934 # in the revlog
934 if self.target_rev is not None and rev >= self.target_rev:
935 if self.target_rev is not None and rev >= self.target_rev:
935 return False
936 return False
936 # no delta for rawtext-changing revs (see "candelta" for why)
937 # no delta for rawtext-changing revs (see "candelta" for why)
937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 return False
939 return False
939 return True
940 return True
940
941
941 def _pre_filter_rev_delta_chain(self, rev):
942 def _pre_filter_rev_delta_chain(self, rev):
942 """pre filtering that is needed in sparse revlog cases
943 """pre filtering that is needed in sparse revlog cases
943
944
944 return True if it seems okay to test a rev, False otherwise.
945 return True if it seems okay to test a rev, False otherwise.
945
946
946 used by _pre_filter_rev.
947 used by _pre_filter_rev.
947 """
948 """
948 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
949 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
949 # filter out delta base that will never produce good delta
950 # filter out delta base that will never produce good delta
950 #
951 #
951 # if the delta of that base is already bigger than the limit
952 # if the delta of that base is already bigger than the limit
952 # for the delta chain size, doing a delta is hopeless.
953 # for the delta chain size, doing a delta is hopeless.
953 if deltas_limit < self.revlog.length(rev):
954 if deltas_limit < self.revlog.length(rev):
954 return False
955 return False
955
956
956 # If we reach here, we are about to build and test a delta.
957 # If we reach here, we are about to build and test a delta.
957 # The delta building process will compute the chaininfo in all
958 # The delta building process will compute the chaininfo in all
958 # case, since that computation is cached, it is fine to access
959 # case, since that computation is cached, it is fine to access
959 # it here too.
960 # it here too.
960 chainlen, chainsize = self.revlog._chaininfo(rev)
961 chainlen, chainsize = self.revlog._chaininfo(rev)
961 # if chain will be too long, skip base
962 # if chain will be too long, skip base
962 if (
963 if (
963 self.revlog.delta_config.max_chain_len
964 self.revlog.delta_config.max_chain_len
964 and chainlen >= self.revlog.delta_config.max_chain_len
965 and chainlen >= self.revlog.delta_config.max_chain_len
965 ):
966 ):
966 return False
967 return False
967 # if chain already have too much data, skip base
968 # if chain already have too much data, skip base
968 if deltas_limit < chainsize:
969 if deltas_limit < chainsize:
969 return False
970 return False
970 return True
971 return True
971
972
972 def _pre_filter_rev(self, rev):
973 def _pre_filter_rev(self, rev):
973 """return True if it seems okay to test a rev, False otherwise"""
974 """return True if it seems okay to test a rev, False otherwise"""
974 if not self._pre_filter_rev_universal(rev):
975 if not self._pre_filter_rev_universal(rev):
975 return False
976 return False
976 if not self._pre_filter_rev_delta_chain(rev):
977 if not self._pre_filter_rev_delta_chain(rev):
977 return False
978 return False
978 return True
979 return True
979
980
980 def _iter_parents(self):
981 def _iter_parents(self):
981 # exclude already lazy tested base if any
982 # exclude already lazy tested base if any
982 parents = [p for p in (self.p1, self.p2) if p != nullrev]
983 parents = [p for p in (self.p1, self.p2) if p != nullrev]
983
984
984 self.current_stage = _STAGE_PARENTS
985 self.current_stage = _STAGE_PARENTS
985 if (
986 if (
986 not self.revlog.delta_config.delta_both_parents
987 not self.revlog.delta_config.delta_both_parents
987 and len(parents) == 2
988 and len(parents) == 2
988 ):
989 ):
989 parents.sort()
990 parents.sort()
990 # To minimize the chance of having to build a fulltext,
991 # To minimize the chance of having to build a fulltext,
991 # pick first whichever parent is closest to us (max rev)
992 # pick first whichever parent is closest to us (max rev)
992 yield (parents[1],)
993 yield (parents[1],)
993 # then the other one (min rev) if the first did not fit
994 # then the other one (min rev) if the first did not fit
994 yield (parents[0],)
995 yield (parents[0],)
995 elif len(parents) > 0:
996 elif len(parents) > 0:
996 # Test all parents (1 or 2), and keep the best candidate
997 # Test all parents (1 or 2), and keep the best candidate
997 yield parents
998 yield parents
998
999
999 def _iter_prev(self):
1000 def _iter_prev(self):
1000 # other approach failed try against prev to hopefully save us a
1001 # other approach failed try against prev to hopefully save us a
1001 # fulltext.
1002 # fulltext.
1002 self.current_stage = _STAGE_PREV
1003 self.current_stage = _STAGE_PREV
1003 yield (self.target_rev - 1,)
1004 yield (self.target_rev - 1,)
1004
1005
1005 def _iter_groups(self):
1006 def _iter_groups(self):
1006 good = None
1007 good = None
1007 for group in self._iter_parents():
1008 for group in self._iter_parents():
1008 good = yield group
1009 good = yield group
1009 if good is not None:
1010 if good is not None:
1010 break
1011 break
1011 else:
1012 else:
1012 assert good is None
1013 assert good is None
1013 yield from self._iter_prev()
1014 yield from self._iter_prev()
1014 yield None
1015 yield None
1015
1016
1016
1017
1017 class _SparseDeltaSearch(_GeneralDeltaSearch):
1018 class _SparseDeltaSearch(_GeneralDeltaSearch):
1018 """Delta search variants for sparse-revlog"""
1019 """Delta search variants for sparse-revlog"""
1019
1020
1020 def is_good_delta_info(self, deltainfo):
1021 def is_good_delta_info(self, deltainfo):
1021 """Returns True if the given delta is good.
1022 """Returns True if the given delta is good.
1022
1023
1023 Good means that it is within the disk span, disk size, and chain length
1024 Good means that it is within the disk span, disk size, and chain length
1024 bounds that we know to be performant.
1025 bounds that we know to be performant.
1025 """
1026 """
1026 if not self._is_good_delta_info_universal(deltainfo):
1027 if not self._is_good_delta_info_universal(deltainfo):
1027 return False
1028 return False
1028 if not self._is_good_delta_info_chain_quality(deltainfo):
1029 if not self._is_good_delta_info_chain_quality(deltainfo):
1029 return False
1030 return False
1030 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1031 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1031 return False
1032 return False
1032 return True
1033 return True
1033
1034
1034 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1035 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1035 """Returns True if the chain associated with snapshots
1036 """Returns True if the chain associated with snapshots
1036
1037
1037 This performs checks for format that use sparse-revlog and intermediate
1038 This performs checks for format that use sparse-revlog and intermediate
1038 snapshots.
1039 snapshots.
1039
1040
1040 This is used by is_good_delta_info.
1041 This is used by is_good_delta_info.
1041 """
1042 """
1042 # if not a snapshot, this method has no filtering to do
1043 # if not a snapshot, this method has no filtering to do
1043 if deltainfo.snapshotdepth is None:
1044 if deltainfo.snapshotdepth is None:
1044 return True
1045 return True
1045 # bad delta from intermediate snapshot size limit
1046 # bad delta from intermediate snapshot size limit
1046 #
1047 #
1047 # If an intermediate snapshot size is higher than the limit. The
1048 # If an intermediate snapshot size is higher than the limit. The
1048 # limit exist to prevent endless chain of intermediate delta to be
1049 # limit exist to prevent endless chain of intermediate delta to be
1049 # created.
1050 # created.
1050 if (
1051 if (
1051 self.revinfo.textlen >> deltainfo.snapshotdepth
1052 self.revinfo.textlen >> deltainfo.snapshotdepth
1052 ) < deltainfo.deltalen:
1053 ) < deltainfo.deltalen:
1053 return False
1054 return False
1054
1055
1055 # bad delta if new intermediate snapshot is larger than the previous
1056 # bad delta if new intermediate snapshot is larger than the previous
1056 # snapshot
1057 # snapshot
1057 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1058 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1058 return False
1059 return False
1059
1060
1060 return True
1061 return True
1061
1062
1062 def _pre_filter_rev(self, rev):
1063 def _pre_filter_rev(self, rev):
1063 """return True if it seems okay to test a rev, False otherwise"""
1064 """return True if it seems okay to test a rev, False otherwise"""
1064 if not self._pre_filter_rev_universal(rev):
1065 if not self._pre_filter_rev_universal(rev):
1065 return False
1066 return False
1066 if not self._pre_filter_rev_delta_chain(rev):
1067 if not self._pre_filter_rev_delta_chain(rev):
1067 return False
1068 return False
1068 if not self._pre_filter_rev_sparse(rev):
1069 if not self._pre_filter_rev_sparse(rev):
1069 return False
1070 return False
1070 return True
1071 return True
1071
1072
1072 def _pre_filter_rev_sparse(self, rev):
1073 def _pre_filter_rev_sparse(self, rev):
1073 """pre filtering that is needed in sparse revlog cases
1074 """pre filtering that is needed in sparse revlog cases
1074
1075
1075 return True if it seems okay to test a rev, False otherwise.
1076 return True if it seems okay to test a rev, False otherwise.
1076
1077
1077 used by _pre_filter_rev.
1078 used by _pre_filter_rev.
1078 """
1079 """
1079 assert self.revlog.delta_config.sparse_revlog
1080 assert self.revlog.delta_config.sparse_revlog
1080 # if the revision we test again is too small, the resulting delta
1081 # if the revision we test again is too small, the resulting delta
1081 # will be large anyway as that amount of data to be added is big
1082 # will be large anyway as that amount of data to be added is big
1082 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 return False
1084 return False
1084
1085
1085 if self.revlog.delta_config.upper_bound_comp is not None:
1086 if self.revlog.delta_config.upper_bound_comp is not None:
1086 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 basenotsnap = (self.p1, self.p2, nullrev)
1088 basenotsnap = (self.p1, self.p2, nullrev)
1088 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 # If text is significantly larger than the base, we can
1091 # If text is significantly larger than the base, we can
1091 # expect the resulting delta to be proportional to the size
1092 # expect the resulting delta to be proportional to the size
1092 # difference
1093 # difference
1093 revsize = self.revlog.rawsize(rev)
1094 revsize = self.revlog.rawsize(rev)
1094 rawsizedistance = max(self.textlen - revsize, 0)
1095 rawsizedistance = max(self.textlen - revsize, 0)
1095 # use an estimate of the compression upper bound.
1096 # use an estimate of the compression upper bound.
1096 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097
1098
1098 # check the absolute constraint on the delta size
1099 # check the absolute constraint on the delta size
1099 snapshotlimit = self.textlen >> snapshotdepth
1100 snapshotlimit = self.textlen >> snapshotdepth
1100 if snapshotlimit < lowestrealisticdeltalen:
1101 if snapshotlimit < lowestrealisticdeltalen:
1101 # delta lower bound is larger than accepted upper
1102 # delta lower bound is larger than accepted upper
1102 # bound
1103 # bound
1103 return False
1104 return False
1104
1105
1105 # check the relative constraint on the delta size
1106 # check the relative constraint on the delta size
1106 revlength = self.revlog.length(rev)
1107 revlength = self.revlog.length(rev)
1107 if revlength < lowestrealisticdeltalen:
1108 if revlength < lowestrealisticdeltalen:
1108 # delta probable lower bound is larger than target
1109 # delta probable lower bound is larger than target
1109 # base
1110 # base
1110 return False
1111 return False
1111 return True
1112 return True
1112
1113
1113 def _iter_snapshots_base(self):
1114 def _iter_snapshots_base(self):
1114 assert self.revlog.delta_config.sparse_revlog
1115 assert self.revlog.delta_config.sparse_revlog
1115 assert self.current_stage == _STAGE_SNAPSHOT
1116 assert self.current_stage == _STAGE_SNAPSHOT
1116 prev = self.target_rev - 1
1117 prev = self.target_rev - 1
1117 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1118 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1118
1119
1119 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1120 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1120 if not parents:
1121 if not parents:
1121 return
1122 return
1122 # See if we can use an existing snapshot in the parent chains to
1123 # See if we can use an existing snapshot in the parent chains to
1123 # use as a base for a new intermediate-snapshot
1124 # use as a base for a new intermediate-snapshot
1124 #
1125 #
1125 # search for snapshot in parents delta chain map: snapshot-level:
1126 # search for snapshot in parents delta chain map: snapshot-level:
1126 # snapshot-rev
1127 # snapshot-rev
1127 parents_snaps = collections.defaultdict(set)
1128 parents_snaps = collections.defaultdict(set)
1128 candidate_chains = [deltachain(p) for p in parents]
1129 candidate_chains = [deltachain(p) for p in parents]
1129 for chain in candidate_chains:
1130 for chain in candidate_chains:
1130 for idx, s in enumerate(chain):
1131 for idx, s in enumerate(chain):
1131 if not self.revlog.issnapshot(s):
1132 if not self.revlog.issnapshot(s):
1132 break
1133 break
1133 parents_snaps[idx].add(s)
1134 parents_snaps[idx].add(s)
1134 snapfloor = min(parents_snaps[0]) + 1
1135 snapfloor = min(parents_snaps[0]) + 1
1135 self.snapshot_cache.update(self.revlog, snapfloor)
1136 self.snapshot_cache.update(self.revlog, snapfloor)
1136 # search for the highest "unrelated" revision
1137 # search for the highest "unrelated" revision
1137 #
1138 #
1138 # Adding snapshots used by "unrelated" revision increase the odd we
1139 # Adding snapshots used by "unrelated" revision increase the odd we
1139 # reuse an independant, yet better snapshot chain.
1140 # reuse an independant, yet better snapshot chain.
1140 #
1141 #
1141 # XXX instead of building a set of revisions, we could lazily
1142 # XXX instead of building a set of revisions, we could lazily
1142 # enumerate over the chains. That would be more efficient, however
1143 # enumerate over the chains. That would be more efficient, however
1143 # we stick to simple code for now.
1144 # we stick to simple code for now.
1144 all_revs = set()
1145 all_revs = set()
1145 for chain in candidate_chains:
1146 for chain in candidate_chains:
1146 all_revs.update(chain)
1147 all_revs.update(chain)
1147 other = None
1148 other = None
1148 for r in self.revlog.revs(prev, snapfloor):
1149 for r in self.revlog.revs(prev, snapfloor):
1149 if r not in all_revs:
1150 if r not in all_revs:
1150 other = r
1151 other = r
1151 break
1152 break
1152 if other is not None:
1153 if other is not None:
1153 # To avoid unfair competition, we won't use unrelated
1154 # To avoid unfair competition, we won't use unrelated
1154 # intermediate snapshot that are deeper than the ones from the
1155 # intermediate snapshot that are deeper than the ones from the
1155 # parent delta chain.
1156 # parent delta chain.
1156 max_depth = max(parents_snaps.keys())
1157 max_depth = max(parents_snaps.keys())
1157 chain = deltachain(other)
1158 chain = deltachain(other)
1158 for depth, s in enumerate(chain):
1159 for depth, s in enumerate(chain):
1159 if s < snapfloor:
1160 if s < snapfloor:
1160 continue
1161 continue
1161 if max_depth < depth:
1162 if max_depth < depth:
1162 break
1163 break
1163 if not self.revlog.issnapshot(s):
1164 if not self.revlog.issnapshot(s):
1164 break
1165 break
1165 parents_snaps[depth].add(s)
1166 parents_snaps[depth].add(s)
1166 # Test them as possible intermediate snapshot base We test them
1167 # Test them as possible intermediate snapshot base We test them
1167 # from highest to lowest level. High level one are more likely to
1168 # from highest to lowest level. High level one are more likely to
1168 # result in small delta
1169 # result in small delta
1169 floor = None
1170 floor = None
1170 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1171 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1171 siblings = set()
1172 siblings = set()
1172 for s in snaps:
1173 for s in snaps:
1173 siblings.update(self.snapshot_cache.snapshots[s])
1174 siblings.update(self.snapshot_cache.snapshots[s])
1174 # Before considering making a new intermediate snapshot, we
1175 # Before considering making a new intermediate snapshot, we
1175 # check if an existing snapshot, children of base we consider,
1176 # check if an existing snapshot, children of base we consider,
1176 # would be suitable.
1177 # would be suitable.
1177 #
1178 #
1178 # It give a change to reuse a delta chain "unrelated" to the
1179 # It give a change to reuse a delta chain "unrelated" to the
1179 # current revision instead of starting our own. Without such
1180 # current revision instead of starting our own. Without such
1180 # re-use, topological branches would keep reopening new chains.
1181 # re-use, topological branches would keep reopening new chains.
1181 # Creating more and more snapshot as the repository grow.
1182 # Creating more and more snapshot as the repository grow.
1182
1183
1183 if floor is not None:
1184 if floor is not None:
1184 # We only do this for siblings created after the one in our
1185 # We only do this for siblings created after the one in our
1185 # parent's delta chain. Those created before has less
1186 # parent's delta chain. Those created before has less
1186 # chances to be valid base since our ancestors had to
1187 # chances to be valid base since our ancestors had to
1187 # create a new snapshot.
1188 # create a new snapshot.
1188 siblings = [r for r in siblings if floor < r]
1189 siblings = [r for r in siblings if floor < r]
1189 yield tuple(sorted(siblings))
1190 yield tuple(sorted(siblings))
1190 # then test the base from our parent's delta chain.
1191 # then test the base from our parent's delta chain.
1191 yield tuple(sorted(snaps))
1192 yield tuple(sorted(snaps))
1192 floor = min(snaps)
1193 floor = min(snaps)
1193 # No suitable base found in the parent chain, search if any full
1194 # No suitable base found in the parent chain, search if any full
1194 # snapshots emitted since parent's base would be a suitable base
1195 # snapshots emitted since parent's base would be a suitable base
1195 # for an intermediate snapshot.
1196 # for an intermediate snapshot.
1196 #
1197 #
1197 # It give a chance to reuse a delta chain unrelated to the current
1198 # It give a chance to reuse a delta chain unrelated to the current
1198 # revisions instead of starting our own. Without such re-use,
1199 # revisions instead of starting our own. Without such re-use,
1199 # topological branches would keep reopening new full chains.
1200 # topological branches would keep reopening new full chains.
1200 # Creating more and more snapshot as the repository grow.
1201 # Creating more and more snapshot as the repository grow.
1201 full = [
1202 full = [
1202 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1203 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1203 ]
1204 ]
1204 yield tuple(sorted(full))
1205 yield tuple(sorted(full))
1205
1206
1206 def _iter_snapshots(self):
1207 def _iter_snapshots(self):
1207 assert self.revlog.delta_config.sparse_revlog
1208 assert self.revlog.delta_config.sparse_revlog
1208 self.current_stage = _STAGE_SNAPSHOT
1209 self.current_stage = _STAGE_SNAPSHOT
1209 good = None
1210 good = None
1210 groups = self._iter_snapshots_base()
1211 groups = self._iter_snapshots_base()
1211 for candidates in groups:
1212 for candidates in groups:
1212 good = yield candidates
1213 good = yield candidates
1213 if good is not None:
1214 if good is not None:
1214 break
1215 break
1215 # if we have a refinable value, try to refine it
1216 # if we have a refinable value, try to refine it
1216 if good is not None and good.snapshotdepth is not None:
1217 if good is not None and good.snapshotdepth is not None:
1217 assert self.current_stage == _STAGE_SNAPSHOT
1218 assert self.current_stage == _STAGE_SNAPSHOT
1218 # refine snapshot down
1219 # refine snapshot down
1219 previous = None
1220 previous = None
1220 while previous != good:
1221 while previous != good:
1221 previous = good
1222 previous = good
1222 base = self.revlog.deltaparent(good.base)
1223 base = self.revlog.deltaparent(good.base)
1223 if base == nullrev:
1224 if base == nullrev:
1224 break
1225 break
1225 good = yield (base,)
1226 good = yield (base,)
1226 # refine snapshot up
1227 # refine snapshot up
1227 if not self.snapshot_cache.snapshots:
1228 if not self.snapshot_cache.snapshots:
1228 self.snapshot_cache.update(self.revlog, good.base + 1)
1229 self.snapshot_cache.update(self.revlog, good.base + 1)
1229 previous = None
1230 previous = None
1230 while good != previous:
1231 while good != previous:
1231 previous = good
1232 previous = good
1232 children = tuple(
1233 children = tuple(
1233 sorted(c for c in self.snapshot_cache.snapshots[good.base])
1234 sorted(c for c in self.snapshot_cache.snapshots[good.base])
1234 )
1235 )
1235 good = yield children
1236 good = yield children
1236 yield None
1237 yield None
1237
1238
1238 def _iter_groups(self):
1239 def _iter_groups(self):
1239 good = None
1240 good = None
1240 for group in self._iter_parents():
1241 for group in self._iter_parents():
1241 good = yield group
1242 good = yield group
1242 if good is not None:
1243 if good is not None:
1243 break
1244 break
1244 else:
1245 else:
1245 assert good is None
1246 assert good is None
1246 assert self.revlog.delta_config.sparse_revlog
1247 assert self.revlog.delta_config.sparse_revlog
1247 # If sparse revlog is enabled, we can try to refine the
1248 # If sparse revlog is enabled, we can try to refine the
1248 # available deltas
1249 # available deltas
1249 iter_snap = self._iter_snapshots()
1250 iter_snap = self._iter_snapshots()
1250 group = iter_snap.send(None)
1251 group = iter_snap.send(None)
1251 while group is not None:
1252 while group is not None:
1252 good = yield group
1253 good = yield group
1253 group = iter_snap.send(good)
1254 group = iter_snap.send(good)
1254 yield None
1255 yield None
1255
1256
1256
1257
1257 class SnapshotCache:
1258 class SnapshotCache:
1258 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1259 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1259
1260
1260 def __init__(self):
1261 def __init__(self):
1261 self.snapshots = collections.defaultdict(set)
1262 self.snapshots = collections.defaultdict(set)
1262 self._start_rev = None
1263 self._start_rev = None
1263 self._end_rev = None
1264 self._end_rev = None
1264
1265
1265 def update(self, revlog, start_rev=0):
1266 def update(self, revlog, start_rev=0):
1266 """find snapshots from start_rev to tip"""
1267 """find snapshots from start_rev to tip"""
1267 nb_revs = len(revlog)
1268 nb_revs = len(revlog)
1268 end_rev = nb_revs - 1
1269 end_rev = nb_revs - 1
1269 if start_rev > end_rev:
1270 if start_rev > end_rev:
1270 return # range is empty
1271 return # range is empty
1271
1272
1272 if self._start_rev is None:
1273 if self._start_rev is None:
1273 assert self._end_rev is None
1274 assert self._end_rev is None
1274 self._update(revlog, start_rev, end_rev)
1275 self._update(revlog, start_rev, end_rev)
1275 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1276 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1276 if start_rev < self._start_rev:
1277 if start_rev < self._start_rev:
1277 self._update(revlog, start_rev, self._start_rev - 1)
1278 self._update(revlog, start_rev, self._start_rev - 1)
1278 if self._end_rev < end_rev:
1279 if self._end_rev < end_rev:
1279 self._update(revlog, self._end_rev + 1, end_rev)
1280 self._update(revlog, self._end_rev + 1, end_rev)
1280
1281
1281 if self._start_rev is None:
1282 if self._start_rev is None:
1282 assert self._end_rev is None
1283 assert self._end_rev is None
1283 self._end_rev = end_rev
1284 self._end_rev = end_rev
1284 self._start_rev = start_rev
1285 self._start_rev = start_rev
1285 else:
1286 else:
1286 self._start_rev = min(self._start_rev, start_rev)
1287 self._start_rev = min(self._start_rev, start_rev)
1287 self._end_rev = max(self._end_rev, end_rev)
1288 self._end_rev = max(self._end_rev, end_rev)
1288 assert self._start_rev <= self._end_rev, (
1289 assert self._start_rev <= self._end_rev, (
1289 self._start_rev,
1290 self._start_rev,
1290 self._end_rev,
1291 self._end_rev,
1291 )
1292 )
1292
1293
1293 def _update(self, revlog, start_rev, end_rev):
1294 def _update(self, revlog, start_rev, end_rev):
1294 """internal method that actually do update content"""
1295 """internal method that actually do update content"""
1295 assert self._start_rev is None or (
1296 assert self._start_rev is None or (
1296 start_rev < self._start_rev or start_rev > self._end_rev
1297 start_rev < self._start_rev or start_rev > self._end_rev
1297 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1298 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1298 assert self._start_rev is None or (
1299 assert self._start_rev is None or (
1299 end_rev < self._start_rev or end_rev > self._end_rev
1300 end_rev < self._start_rev or end_rev > self._end_rev
1300 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1301 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1301 cache = self.snapshots
1302 cache = self.snapshots
1302 if hasattr(revlog.index, 'findsnapshots'):
1303 if hasattr(revlog.index, 'findsnapshots'):
1303 revlog.index.findsnapshots(cache, start_rev, end_rev)
1304 revlog.index.findsnapshots(cache, start_rev, end_rev)
1304 else:
1305 else:
1305 deltaparent = revlog.deltaparent
1306 deltaparent = revlog.deltaparent
1306 issnapshot = revlog.issnapshot
1307 issnapshot = revlog.issnapshot
1307 for rev in revlog.revs(start_rev, end_rev):
1308 for rev in revlog.revs(start_rev, end_rev):
1308 if issnapshot(rev):
1309 if issnapshot(rev):
1309 cache[deltaparent(rev)].add(rev)
1310 cache[deltaparent(rev)].add(rev)
1310
1311
1311
1312
1312 class deltacomputer:
1313 class deltacomputer:
1313 """object capable of computing delta and finding delta for multiple revision
1314 """object capable of computing delta and finding delta for multiple revision
1314
1315
1315 This object is meant to compute and find multiple delta applied to the same
1316 This object is meant to compute and find multiple delta applied to the same
1316 revlog.
1317 revlog.
1317 """
1318 """
1318
1319
1319 def __init__(
1320 def __init__(
1320 self,
1321 self,
1321 revlog,
1322 revlog,
1322 write_debug=None,
1323 write_debug=None,
1323 debug_search=False,
1324 debug_search=False,
1324 debug_info=None,
1325 debug_info=None,
1325 ):
1326 ):
1326 self.revlog = revlog
1327 self.revlog = revlog
1327 self._write_debug = write_debug
1328 self._write_debug = write_debug
1328 if write_debug is None:
1329 if write_debug is None:
1329 self._debug_search = False
1330 self._debug_search = False
1330 else:
1331 else:
1331 self._debug_search = debug_search
1332 self._debug_search = debug_search
1332 self._debug_info = debug_info
1333 self._debug_info = debug_info
1333 self._snapshot_cache = SnapshotCache()
1334 self._snapshot_cache = SnapshotCache()
1334
1335
1335 @property
1336 @property
1336 def _gather_debug(self):
1337 def _gather_debug(self):
1337 return self._write_debug is not None or self._debug_info is not None
1338 return self._write_debug is not None or self._debug_info is not None
1338
1339
1339 def buildtext(self, revinfo):
1340 def buildtext(self, revinfo):
1340 """Builds a fulltext version of a revision
1341 """Builds a fulltext version of a revision
1341
1342
1342 revinfo: revisioninfo instance that contains all needed info
1343 revinfo: revisioninfo instance that contains all needed info
1343 """
1344 """
1344 btext = revinfo.btext
1345 btext = revinfo.btext
1345 if btext[0] is not None:
1346 if btext[0] is not None:
1346 return btext[0]
1347 return btext[0]
1347
1348
1348 revlog = self.revlog
1349 revlog = self.revlog
1349 cachedelta = revinfo.cachedelta
1350 cachedelta = revinfo.cachedelta
1350 baserev = cachedelta[0]
1351 baserev = cachedelta[0]
1351 delta = cachedelta[1]
1352 delta = cachedelta[1]
1352
1353
1353 fulltext = btext[0] = _textfromdelta(
1354 fulltext = btext[0] = _textfromdelta(
1354 revlog,
1355 revlog,
1355 baserev,
1356 baserev,
1356 delta,
1357 delta,
1357 revinfo.p1,
1358 revinfo.p1,
1358 revinfo.p2,
1359 revinfo.p2,
1359 revinfo.flags,
1360 revinfo.flags,
1360 revinfo.node,
1361 revinfo.node,
1361 )
1362 )
1362 return fulltext
1363 return fulltext
1363
1364
1364 def _builddeltadiff(self, base, revinfo):
1365 def _builddeltadiff(self, base, revinfo):
1365 revlog = self.revlog
1366 revlog = self.revlog
1366 t = self.buildtext(revinfo)
1367 t = self.buildtext(revinfo)
1367 if revlog.iscensored(base):
1368 if revlog.iscensored(base):
1368 # deltas based on a censored revision must replace the
1369 # deltas based on a censored revision must replace the
1369 # full content in one patch, so delta works everywhere
1370 # full content in one patch, so delta works everywhere
1370 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1371 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1371 delta = header + t
1372 delta = header + t
1372 else:
1373 else:
1373 ptext = revlog.rawdata(base)
1374 ptext = revlog.rawdata(base)
1374 delta = mdiff.textdiff(ptext, t)
1375 delta = mdiff.textdiff(ptext, t)
1375
1376
1376 return delta
1377 return delta
1377
1378
1378 def _builddeltainfo(
1379 def _builddeltainfo(
1379 self, revinfo, base, target_rev=None, as_snapshot=False
1380 self, revinfo, base, target_rev=None, as_snapshot=False
1380 ):
1381 ):
1381 # can we use the cached delta?
1382 # can we use the cached delta?
1382 revlog = self.revlog
1383 revlog = self.revlog
1383 chainbase = revlog.chainbase(base)
1384 chainbase = revlog.chainbase(base)
1384 if revlog.delta_config.general_delta:
1385 if revlog.delta_config.general_delta:
1385 deltabase = base
1386 deltabase = base
1386 else:
1387 else:
1387 if target_rev is not None and base != target_rev - 1:
1388 if target_rev is not None and base != target_rev - 1:
1388 msg = (
1389 msg = (
1389 b'general delta cannot use delta for something else '
1390 b'general delta cannot use delta for something else '
1390 b'than `prev`: %d<-%d'
1391 b'than `prev`: %d<-%d'
1391 )
1392 )
1392 msg %= (base, target_rev)
1393 msg %= (base, target_rev)
1393 raise error.ProgrammingError(msg)
1394 raise error.ProgrammingError(msg)
1394 deltabase = chainbase
1395 deltabase = chainbase
1395 snapshotdepth = None
1396 snapshotdepth = None
1396 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1397 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1397 snapshotdepth = 0
1398 snapshotdepth = 0
1398 elif revlog.delta_config.sparse_revlog and as_snapshot:
1399 elif revlog.delta_config.sparse_revlog and as_snapshot:
1399 assert revlog.issnapshot(deltabase)
1400 assert revlog.issnapshot(deltabase)
1400 # A delta chain should always be one full snapshot,
1401 # A delta chain should always be one full snapshot,
1401 # zero or more semi-snapshots, and zero or more deltas
1402 # zero or more semi-snapshots, and zero or more deltas
1402 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1403 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1403 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1404 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1404 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1405 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1405 delta = None
1406 delta = None
1406 if revinfo.cachedelta:
1407 if revinfo.cachedelta:
1407 cachebase = revinfo.cachedelta[0]
1408 cachebase = revinfo.cachedelta[0]
1408 # check if the diff still apply
1409 # check if the diff still apply
1409 currentbase = cachebase
1410 currentbase = cachebase
1410 while (
1411 while (
1411 currentbase != nullrev
1412 currentbase != nullrev
1412 and currentbase != base
1413 and currentbase != base
1413 and self.revlog.length(currentbase) == 0
1414 and self.revlog.length(currentbase) == 0
1414 ):
1415 ):
1415 currentbase = self.revlog.deltaparent(currentbase)
1416 currentbase = self.revlog.deltaparent(currentbase)
1416 if self.revlog.delta_config.lazy_delta and currentbase == base:
1417 if self.revlog.delta_config.lazy_delta and currentbase == base:
1417 delta = revinfo.cachedelta[1]
1418 delta = revinfo.cachedelta[1]
1418 if delta is None:
1419 if delta is None:
1419 delta = self._builddeltadiff(base, revinfo)
1420 delta = self._builddeltadiff(base, revinfo)
1420 if self._debug_search:
1421 if self._debug_search:
1421 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1422 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1422 msg %= len(delta)
1423 msg %= len(delta)
1423 self._write_debug(msg)
1424 self._write_debug(msg)
1424 # snapshotdept need to be neither None nor 0 level snapshot
1425 # snapshotdept need to be neither None nor 0 level snapshot
1425 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1426 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1426 lowestrealisticdeltalen = (
1427 lowestrealisticdeltalen = (
1427 len(delta) // revlog.delta_config.upper_bound_comp
1428 len(delta) // revlog.delta_config.upper_bound_comp
1428 )
1429 )
1429 snapshotlimit = revinfo.textlen >> snapshotdepth
1430 snapshotlimit = revinfo.textlen >> snapshotdepth
1430 if self._debug_search:
1431 if self._debug_search:
1431 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1432 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1432 msg %= lowestrealisticdeltalen
1433 msg %= lowestrealisticdeltalen
1433 self._write_debug(msg)
1434 self._write_debug(msg)
1434 if snapshotlimit < lowestrealisticdeltalen:
1435 if snapshotlimit < lowestrealisticdeltalen:
1435 if self._debug_search:
1436 if self._debug_search:
1436 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1437 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1437 self._write_debug(msg)
1438 self._write_debug(msg)
1438 return None
1439 return None
1439 if revlog.length(base) < lowestrealisticdeltalen:
1440 if revlog.length(base) < lowestrealisticdeltalen:
1440 if self._debug_search:
1441 if self._debug_search:
1441 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1442 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1442 self._write_debug(msg)
1443 self._write_debug(msg)
1443 return None
1444 return None
1444 header, data = revlog._inner.compress(delta)
1445 header, data = revlog._inner.compress(delta)
1445 deltalen = len(header) + len(data)
1446 deltalen = len(header) + len(data)
1446 offset = revlog.end(len(revlog) - 1)
1447 offset = revlog.end(len(revlog) - 1)
1447 dist = deltalen + offset - revlog.start(chainbase)
1448 dist = deltalen + offset - revlog.start(chainbase)
1448 chainlen, compresseddeltalen = revlog._chaininfo(base)
1449 chainlen, compresseddeltalen = revlog._chaininfo(base)
1449 chainlen += 1
1450 chainlen += 1
1450 compresseddeltalen += deltalen
1451 compresseddeltalen += deltalen
1451
1452
1452 return _deltainfo(
1453 return _deltainfo(
1453 dist,
1454 dist,
1454 deltalen,
1455 deltalen,
1455 (header, data),
1456 (header, data),
1456 deltabase,
1457 deltabase,
1457 chainbase,
1458 chainbase,
1458 chainlen,
1459 chainlen,
1459 compresseddeltalen,
1460 compresseddeltalen,
1460 snapshotdepth,
1461 snapshotdepth,
1461 )
1462 )
1462
1463
1463 def _fullsnapshotinfo(self, revinfo, curr):
1464 def _fullsnapshotinfo(self, revinfo, curr):
1464 rawtext = self.buildtext(revinfo)
1465 rawtext = self.buildtext(revinfo)
1465 data = self.revlog._inner.compress(rawtext)
1466 data = self.revlog._inner.compress(rawtext)
1466 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1467 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1467 deltabase = chainbase = curr
1468 deltabase = chainbase = curr
1468 snapshotdepth = 0
1469 snapshotdepth = 0
1469 chainlen = 1
1470 chainlen = 1
1470
1471
1471 return _deltainfo(
1472 return _deltainfo(
1472 dist,
1473 dist,
1473 deltalen,
1474 deltalen,
1474 data,
1475 data,
1475 deltabase,
1476 deltabase,
1476 chainbase,
1477 chainbase,
1477 chainlen,
1478 chainlen,
1478 compresseddeltalen,
1479 compresseddeltalen,
1479 snapshotdepth,
1480 snapshotdepth,
1480 )
1481 )
1481
1482
1482 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1483 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1483 """Find an acceptable delta against a candidate revision
1484 """Find an acceptable delta against a candidate revision
1484
1485
1485 revinfo: information about the revision (instance of _revisioninfo)
1486 revinfo: information about the revision (instance of _revisioninfo)
1486
1487
1487 Returns the first acceptable candidate revision, as ordered by
1488 Returns the first acceptable candidate revision, as ordered by
1488 _candidategroups
1489 _candidategroups
1489
1490
1490 If no suitable deltabase is found, we return delta info for a full
1491 If no suitable deltabase is found, we return delta info for a full
1491 snapshot.
1492 snapshot.
1492
1493
1493 `excluded_bases` is an optional set of revision that cannot be used as
1494 `excluded_bases` is an optional set of revision that cannot be used as
1494 a delta base. Use this to recompute delta suitable in censor or strip
1495 a delta base. Use this to recompute delta suitable in censor or strip
1495 context.
1496 context.
1496 """
1497 """
1497 if target_rev is None:
1498 if target_rev is None:
1498 target_rev = len(self.revlog)
1499 target_rev = len(self.revlog)
1499
1500
1500 gather_debug = self._gather_debug
1501 gather_debug = self._gather_debug
1501 cachedelta = revinfo.cachedelta
1502 cachedelta = revinfo.cachedelta
1502 revlog = self.revlog
1503 revlog = self.revlog
1503 p1r = p2r = None
1504 p1r = p2r = None
1504
1505
1505 if excluded_bases is None:
1506 if excluded_bases is None:
1506 excluded_bases = set()
1507 excluded_bases = set()
1507
1508
1508 if gather_debug:
1509 if gather_debug:
1509 start = util.timer()
1510 start = util.timer()
1510 dbg = self._one_dbg_data()
1511 dbg = self._one_dbg_data()
1511 dbg['revision'] = target_rev
1512 dbg['revision'] = target_rev
1512 p1r = revlog.rev(revinfo.p1)
1513 p1r = revlog.rev(revinfo.p1)
1513 p2r = revlog.rev(revinfo.p2)
1514 p2r = revlog.rev(revinfo.p2)
1514 if p1r != nullrev:
1515 if p1r != nullrev:
1515 p1_chain_len = revlog._chaininfo(p1r)[0]
1516 p1_chain_len = revlog._chaininfo(p1r)[0]
1516 else:
1517 else:
1517 p1_chain_len = -1
1518 p1_chain_len = -1
1518 if p2r != nullrev:
1519 if p2r != nullrev:
1519 p2_chain_len = revlog._chaininfo(p2r)[0]
1520 p2_chain_len = revlog._chaininfo(p2r)[0]
1520 else:
1521 else:
1521 p2_chain_len = -1
1522 p2_chain_len = -1
1522 dbg['p1-chain-len'] = p1_chain_len
1523 dbg['p1-chain-len'] = p1_chain_len
1523 dbg['p2-chain-len'] = p2_chain_len
1524 dbg['p2-chain-len'] = p2_chain_len
1524
1525
1525 # 1) if the revision is empty, no amount of delta can beat it
1526 # 1) if the revision is empty, no amount of delta can beat it
1526 #
1527 #
1527 # 2) no delta for flag processor revision (see "candelta" for why)
1528 # 2) no delta for flag processor revision (see "candelta" for why)
1528 # not calling candelta since only one revision needs test, also to
1529 # not calling candelta since only one revision needs test, also to
1529 # avoid overhead fetching flags again.
1530 # avoid overhead fetching flags again.
1530 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1531 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1531 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1532 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1532 if gather_debug:
1533 if gather_debug:
1533 end = util.timer()
1534 end = util.timer()
1534 dbg['duration'] = end - start
1535 dbg['duration'] = end - start
1535 dbg[
1536 dbg[
1536 'delta-base'
1537 'delta-base'
1537 ] = deltainfo.base # pytype: disable=attribute-error
1538 ] = deltainfo.base # pytype: disable=attribute-error
1538 dbg['search_round_count'] = 0
1539 dbg['search_round_count'] = 0
1539 dbg['using-cached-base'] = False
1540 dbg['using-cached-base'] = False
1540 dbg['delta_try_count'] = 0
1541 dbg['delta_try_count'] = 0
1541 dbg['type'] = b"full"
1542 dbg['type'] = b"full"
1542 dbg['snapshot-depth'] = 0
1543 dbg['snapshot-depth'] = 0
1543 self._dbg_process_data(dbg)
1544 self._dbg_process_data(dbg)
1544 return deltainfo
1545 return deltainfo
1545
1546
1546 deltainfo = None
1547 deltainfo = None
1547
1548
1548 # If this source delta are to be forcibly reuse, let us comply early.
1549 # If this source delta are to be forcibly reuse, let us comply early.
1549 if (
1550 if (
1550 revlog.delta_config.general_delta
1551 revlog.delta_config.general_delta
1551 and revinfo.cachedelta is not None
1552 and revinfo.cachedelta is not None
1552 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1553 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1553 ):
1554 ):
1554 base = revinfo.cachedelta[0]
1555 base = revinfo.cachedelta[0]
1555 if base == nullrev:
1556 if base == nullrev:
1556 dbg_type = b"full"
1557 dbg_type = b"full"
1557 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1558 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1558 if gather_debug:
1559 if gather_debug:
1559 snapshotdepth = 0
1560 snapshotdepth = 0
1560 elif base not in excluded_bases:
1561 elif base not in excluded_bases:
1561 delta = revinfo.cachedelta[1]
1562 delta = revinfo.cachedelta[1]
1562 header, data = revlog.compress(delta)
1563 header, data = revlog.compress(delta)
1563 deltalen = len(header) + len(data)
1564 deltalen = len(header) + len(data)
1564 if gather_debug:
1565 if gather_debug:
1565 offset = revlog.end(len(revlog) - 1)
1566 offset = revlog.end(len(revlog) - 1)
1566 chainbase = revlog.chainbase(base)
1567 chainbase = revlog.chainbase(base)
1567 distance = deltalen + offset - revlog.start(chainbase)
1568 distance = deltalen + offset - revlog.start(chainbase)
1568 chainlen, compresseddeltalen = revlog._chaininfo(base)
1569 chainlen, compresseddeltalen = revlog._chaininfo(base)
1569 chainlen += 1
1570 chainlen += 1
1570 compresseddeltalen += deltalen
1571 compresseddeltalen += deltalen
1571 if base == p1r or base == p2r:
1572 if base == p1r or base == p2r:
1572 dbg_type = b"delta"
1573 dbg_type = b"delta"
1573 snapshotdepth = None
1574 snapshotdepth = None
1574 elif not revlog.issnapshot(base):
1575 elif not revlog.issnapshot(base):
1575 snapshotdepth = None
1576 snapshotdepth = None
1576 else:
1577 else:
1577 dbg_type = b"snapshot"
1578 dbg_type = b"snapshot"
1578 snapshotdepth = revlog.snapshotdepth(base) + 1
1579 snapshotdepth = revlog.snapshotdepth(base) + 1
1579 else:
1580 else:
1580 distance = None
1581 distance = None
1581 chainbase = None
1582 chainbase = None
1582 chainlen = None
1583 chainlen = None
1583 compresseddeltalen = None
1584 compresseddeltalen = None
1584 snapshotdepth = None
1585 snapshotdepth = None
1585 deltainfo = _deltainfo(
1586 deltainfo = _deltainfo(
1586 distance=distance,
1587 distance=distance,
1587 deltalen=deltalen,
1588 deltalen=deltalen,
1588 data=(header, data),
1589 data=(header, data),
1589 base=base,
1590 base=base,
1590 chainbase=chainbase,
1591 chainbase=chainbase,
1591 chainlen=chainlen,
1592 chainlen=chainlen,
1592 compresseddeltalen=compresseddeltalen,
1593 compresseddeltalen=compresseddeltalen,
1593 snapshotdepth=snapshotdepth,
1594 snapshotdepth=snapshotdepth,
1594 )
1595 )
1595
1596
1596 if deltainfo is not None:
1597 if deltainfo is not None:
1597 if gather_debug:
1598 if gather_debug:
1598 end = util.timer()
1599 end = util.timer()
1599 dbg['duration'] = end - start
1600 dbg['duration'] = end - start
1600 dbg[
1601 dbg[
1601 'delta-base'
1602 'delta-base'
1602 ] = deltainfo.base # pytype: disable=attribute-error
1603 ] = deltainfo.base # pytype: disable=attribute-error
1603 dbg['search_round_count'] = 0
1604 dbg['search_round_count'] = 0
1604 dbg['using-cached-base'] = True
1605 dbg['using-cached-base'] = True
1605 dbg['delta_try_count'] = 0
1606 dbg['delta_try_count'] = 0
1606 dbg['type'] = b"full"
1607 dbg['type'] = b"full"
1607 if snapshotdepth is None:
1608 if snapshotdepth is None:
1608 dbg['snapshot-depth'] = -1
1609 dbg['snapshot-depth'] = -1
1609 else:
1610 else:
1610 dbg['snapshot-depth'] = snapshotdepth
1611 dbg['snapshot-depth'] = snapshotdepth
1611 self._dbg_process_data(dbg)
1612 self._dbg_process_data(dbg)
1612 return deltainfo
1613 return deltainfo
1613
1614
1614 # count the number of different delta we tried (for debug purpose)
1615 # count the number of different delta we tried (for debug purpose)
1615 dbg_try_count = 0
1616 dbg_try_count = 0
1616 # count the number of "search round" we did. (for debug purpose)
1617 # count the number of "search round" we did. (for debug purpose)
1617 dbg_try_rounds = 0
1618 dbg_try_rounds = 0
1618 dbg_type = b'unknown'
1619 dbg_type = b'unknown'
1619
1620
1620 if p1r is None:
1621 if p1r is None:
1621 p1r = revlog.rev(revinfo.p1)
1622 p1r = revlog.rev(revinfo.p1)
1622 p2r = revlog.rev(revinfo.p2)
1623 p2r = revlog.rev(revinfo.p2)
1623
1624
1624 if self._debug_search:
1625 if self._debug_search:
1625 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1626 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1626 msg %= target_rev
1627 msg %= target_rev
1627 self._write_debug(msg)
1628 self._write_debug(msg)
1628
1629
1629 # should we try to build a delta?
1630 # should we try to build a delta?
1630 if not (len(self.revlog) and self.revlog._storedeltachains):
1631 if not (len(self.revlog) and self.revlog._storedeltachains):
1631 search_cls = _NoDeltaSearch
1632 search_cls = _NoDeltaSearch
1632 elif self.revlog.delta_config.sparse_revlog:
1633 elif self.revlog.delta_config.sparse_revlog:
1633 search_cls = _SparseDeltaSearch
1634 search_cls = _SparseDeltaSearch
1634 elif self.revlog.delta_config.general_delta:
1635 elif self.revlog.delta_config.general_delta:
1635 search_cls = _GeneralDeltaSearch
1636 search_cls = _GeneralDeltaSearch
1636 else:
1637 else:
1637 # before general delta, there is only one possible delta base
1638 # before general delta, there is only one possible delta base
1638 search_cls = _PrevDeltaSearch
1639 search_cls = _PrevDeltaSearch
1639
1640
1640 search = search_cls(
1641 search = search_cls(
1641 self.revlog,
1642 self.revlog,
1642 revinfo,
1643 revinfo,
1643 p1r,
1644 p1r,
1644 p2r,
1645 p2r,
1645 cachedelta,
1646 cachedelta,
1646 excluded_bases,
1647 excluded_bases,
1647 target_rev,
1648 target_rev,
1648 snapshot_cache=self._snapshot_cache,
1649 snapshot_cache=self._snapshot_cache,
1649 )
1650 )
1650
1651
1651 while not search.done:
1652 while not search.done:
1652 current_group = search.current_group
1653 current_group = search.current_group
1653 # current_group can be `None`, but not is search.done is False
1654 # current_group can be `None`, but not is search.done is False
1654 # We add this assert to help pytype
1655 # We add this assert to help pytype
1655 assert current_group is not None
1656 assert current_group is not None
1656 candidaterevs = current_group
1657 candidaterevs = current_group
1657 dbg_try_rounds += 1
1658 dbg_try_rounds += 1
1658 if self._debug_search:
1659 if self._debug_search:
1659 prev = None
1660 prev = None
1660 if deltainfo is not None:
1661 if deltainfo is not None:
1661 prev = deltainfo.base
1662 prev = deltainfo.base
1662
1663
1663 if (
1664 if (
1664 cachedelta is not None
1665 cachedelta is not None
1665 and len(candidaterevs) == 1
1666 and len(candidaterevs) == 1
1666 and cachedelta[0] in candidaterevs
1667 and cachedelta[0] in candidaterevs
1667 ):
1668 ):
1668 round_type = b"cached-delta"
1669 round_type = b"cached-delta"
1669 elif p1r in candidaterevs or p2r in candidaterevs:
1670 elif p1r in candidaterevs or p2r in candidaterevs:
1670 round_type = b"parents"
1671 round_type = b"parents"
1671 elif prev is not None and all(c < prev for c in candidaterevs):
1672 elif prev is not None and all(c < prev for c in candidaterevs):
1672 round_type = b"refine-down"
1673 round_type = b"refine-down"
1673 elif prev is not None and all(c > prev for c in candidaterevs):
1674 elif prev is not None and all(c > prev for c in candidaterevs):
1674 round_type = b"refine-up"
1675 round_type = b"refine-up"
1675 else:
1676 else:
1676 round_type = b"search-down"
1677 round_type = b"search-down"
1677 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1678 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1678 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1679 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1679 self._write_debug(msg)
1680 self._write_debug(msg)
1680 nominateddeltas = []
1681 nominateddeltas = []
1681 if deltainfo is not None:
1682 if deltainfo is not None:
1682 if self._debug_search:
1683 if self._debug_search:
1683 msg = (
1684 msg = (
1684 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1685 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1685 )
1686 )
1686 msg %= (deltainfo.base, deltainfo.deltalen)
1687 msg %= (deltainfo.base, deltainfo.deltalen)
1687 self._write_debug(msg)
1688 self._write_debug(msg)
1688 # if we already found a good delta,
1689 # if we already found a good delta,
1689 # challenge it against refined candidates
1690 # challenge it against refined candidates
1690 nominateddeltas.append(deltainfo)
1691 nominateddeltas.append(deltainfo)
1691 for candidaterev in candidaterevs:
1692 for candidaterev in candidaterevs:
1692 if self._debug_search:
1693 if self._debug_search:
1693 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1694 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1694 msg %= candidaterev
1695 msg %= candidaterev
1695 self._write_debug(msg)
1696 self._write_debug(msg)
1696 candidate_type = None
1697 candidate_type = None
1697 if candidaterev == p1r:
1698 if candidaterev == p1r:
1698 candidate_type = b"p1"
1699 candidate_type = b"p1"
1699 elif candidaterev == p2r:
1700 elif candidaterev == p2r:
1700 candidate_type = b"p2"
1701 candidate_type = b"p2"
1701 elif self.revlog.issnapshot(candidaterev):
1702 elif self.revlog.issnapshot(candidaterev):
1702 candidate_type = b"snapshot-%d"
1703 candidate_type = b"snapshot-%d"
1703 candidate_type %= self.revlog.snapshotdepth(
1704 candidate_type %= self.revlog.snapshotdepth(
1704 candidaterev
1705 candidaterev
1705 )
1706 )
1706
1707
1707 if candidate_type is not None:
1708 if candidate_type is not None:
1708 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1709 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1709 msg %= candidate_type
1710 msg %= candidate_type
1710 self._write_debug(msg)
1711 self._write_debug(msg)
1711 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1712 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1712 msg %= self.revlog.length(candidaterev)
1713 msg %= self.revlog.length(candidaterev)
1713 self._write_debug(msg)
1714 self._write_debug(msg)
1714 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1715 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1715 msg %= self.revlog.deltaparent(candidaterev)
1716 msg %= self.revlog.deltaparent(candidaterev)
1716 self._write_debug(msg)
1717 self._write_debug(msg)
1717
1718
1718 dbg_try_count += 1
1719 dbg_try_count += 1
1719
1720
1720 if self._debug_search:
1721 if self._debug_search:
1721 delta_start = util.timer()
1722 delta_start = util.timer()
1722 candidatedelta = self._builddeltainfo(
1723 candidatedelta = self._builddeltainfo(
1723 revinfo,
1724 revinfo,
1724 candidaterev,
1725 candidaterev,
1725 target_rev=target_rev,
1726 target_rev=target_rev,
1726 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1727 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1727 )
1728 )
1728 if self._debug_search:
1729 if self._debug_search:
1729 delta_end = util.timer()
1730 delta_end = util.timer()
1730 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1731 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1731 msg %= delta_end - delta_start
1732 msg %= delta_end - delta_start
1732 self._write_debug(msg)
1733 self._write_debug(msg)
1733 if candidatedelta is not None:
1734 if candidatedelta is not None:
1734 if search.is_good_delta_info(candidatedelta):
1735 if search.is_good_delta_info(candidatedelta):
1735 if self._debug_search:
1736 if self._debug_search:
1736 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1737 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1737 msg %= candidatedelta.deltalen
1738 msg %= candidatedelta.deltalen
1738 self._write_debug(msg)
1739 self._write_debug(msg)
1739 nominateddeltas.append(candidatedelta)
1740 nominateddeltas.append(candidatedelta)
1740 elif self._debug_search:
1741 elif self._debug_search:
1741 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1742 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1742 msg %= candidatedelta.deltalen
1743 msg %= candidatedelta.deltalen
1743 self._write_debug(msg)
1744 self._write_debug(msg)
1744 elif self._debug_search:
1745 elif self._debug_search:
1745 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1746 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1746 self._write_debug(msg)
1747 self._write_debug(msg)
1747 if nominateddeltas:
1748 if nominateddeltas:
1748 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1749 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1749 search.next_group(deltainfo)
1750 search.next_group(deltainfo)
1750
1751
1751 if deltainfo is None:
1752 if deltainfo is None:
1752 dbg_type = b"full"
1753 dbg_type = b"full"
1753 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1754 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1754 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1755 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1755 dbg_type = b"snapshot"
1756 dbg_type = b"snapshot"
1756 else:
1757 else:
1757 dbg_type = b"delta"
1758 dbg_type = b"delta"
1758
1759
1759 if gather_debug:
1760 if gather_debug:
1760 end = util.timer()
1761 end = util.timer()
1761 if dbg_type == b'full':
1762 if dbg_type == b'full':
1762 used_cached = (
1763 used_cached = (
1763 cachedelta is not None
1764 cachedelta is not None
1764 and dbg_try_rounds == 0
1765 and dbg_try_rounds == 0
1765 and dbg_try_count == 0
1766 and dbg_try_count == 0
1766 and cachedelta[0] == nullrev
1767 and cachedelta[0] == nullrev
1767 )
1768 )
1768 else:
1769 else:
1769 used_cached = (
1770 used_cached = (
1770 cachedelta is not None
1771 cachedelta is not None
1771 and dbg_try_rounds == 1
1772 and dbg_try_rounds == 1
1772 and dbg_try_count == 1
1773 and dbg_try_count == 1
1773 and deltainfo.base == cachedelta[0]
1774 and deltainfo.base == cachedelta[0]
1774 )
1775 )
1775 dbg['duration'] = end - start
1776 dbg['duration'] = end - start
1776 dbg[
1777 dbg[
1777 'delta-base'
1778 'delta-base'
1778 ] = deltainfo.base # pytype: disable=attribute-error
1779 ] = deltainfo.base # pytype: disable=attribute-error
1779 dbg['search_round_count'] = dbg_try_rounds
1780 dbg['search_round_count'] = dbg_try_rounds
1780 dbg['using-cached-base'] = used_cached
1781 dbg['using-cached-base'] = used_cached
1781 dbg['delta_try_count'] = dbg_try_count
1782 dbg['delta_try_count'] = dbg_try_count
1782 dbg['type'] = dbg_type
1783 dbg['type'] = dbg_type
1783 if (
1784 if (
1784 deltainfo.snapshotdepth # pytype: disable=attribute-error
1785 deltainfo.snapshotdepth # pytype: disable=attribute-error
1785 is not None
1786 is not None
1786 ):
1787 ):
1787 dbg[
1788 dbg[
1788 'snapshot-depth'
1789 'snapshot-depth'
1789 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1790 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1790 else:
1791 else:
1791 dbg['snapshot-depth'] = -1
1792 dbg['snapshot-depth'] = -1
1792 self._dbg_process_data(dbg)
1793 self._dbg_process_data(dbg)
1793 return deltainfo
1794 return deltainfo
1794
1795
1795 def _one_dbg_data(self):
1796 def _one_dbg_data(self):
1796 dbg = {
1797 dbg = {
1797 'duration': None,
1798 'duration': None,
1798 'revision': None,
1799 'revision': None,
1799 'delta-base': None,
1800 'delta-base': None,
1800 'search_round_count': None,
1801 'search_round_count': None,
1801 'using-cached-base': None,
1802 'using-cached-base': None,
1802 'delta_try_count': None,
1803 'delta_try_count': None,
1803 'type': None,
1804 'type': None,
1804 'p1-chain-len': None,
1805 'p1-chain-len': None,
1805 'p2-chain-len': None,
1806 'p2-chain-len': None,
1806 'snapshot-depth': None,
1807 'snapshot-depth': None,
1807 'target-revlog': None,
1808 'target-revlog': None,
1808 }
1809 }
1809 target_revlog = b"UNKNOWN"
1810 target_revlog = b"UNKNOWN"
1810 target_type = self.revlog.target[0]
1811 target_type = self.revlog.target[0]
1811 target_key = self.revlog.target[1]
1812 target_key = self.revlog.target[1]
1812 if target_type == KIND_CHANGELOG:
1813 if target_type == KIND_CHANGELOG:
1813 target_revlog = b'CHANGELOG:'
1814 target_revlog = b'CHANGELOG:'
1814 elif target_type == KIND_MANIFESTLOG:
1815 elif target_type == KIND_MANIFESTLOG:
1815 target_revlog = b'MANIFESTLOG:'
1816 target_revlog = b'MANIFESTLOG:'
1816 if target_key:
1817 if target_key:
1817 target_revlog += b'%s:' % target_key
1818 target_revlog += b'%s:' % target_key
1818 elif target_type == KIND_FILELOG:
1819 elif target_type == KIND_FILELOG:
1819 target_revlog = b'FILELOG:'
1820 target_revlog = b'FILELOG:'
1820 if target_key:
1821 if target_key:
1821 target_revlog += b'%s:' % target_key
1822 target_revlog += b'%s:' % target_key
1822 dbg['target-revlog'] = target_revlog
1823 dbg['target-revlog'] = target_revlog
1823 return dbg
1824 return dbg
1824
1825
1825 def _dbg_process_data(self, dbg):
1826 def _dbg_process_data(self, dbg):
1826 if self._debug_info is not None:
1827 if self._debug_info is not None:
1827 self._debug_info.append(dbg)
1828 self._debug_info.append(dbg)
1828
1829
1829 if self._write_debug is not None:
1830 if self._write_debug is not None:
1830 msg = (
1831 msg = (
1831 b"DBG-DELTAS:"
1832 b"DBG-DELTAS:"
1832 b" %-12s"
1833 b" %-12s"
1833 b" rev=%d:"
1834 b" rev=%d:"
1834 b" delta-base=%d"
1835 b" delta-base=%d"
1835 b" is-cached=%d"
1836 b" is-cached=%d"
1836 b" - search-rounds=%d"
1837 b" - search-rounds=%d"
1837 b" try-count=%d"
1838 b" try-count=%d"
1838 b" - delta-type=%-6s"
1839 b" - delta-type=%-6s"
1839 b" snap-depth=%d"
1840 b" snap-depth=%d"
1840 b" - p1-chain-length=%d"
1841 b" - p1-chain-length=%d"
1841 b" p2-chain-length=%d"
1842 b" p2-chain-length=%d"
1842 b" - duration=%f"
1843 b" - duration=%f"
1843 b"\n"
1844 b"\n"
1844 )
1845 )
1845 msg %= (
1846 msg %= (
1846 dbg["target-revlog"],
1847 dbg["target-revlog"],
1847 dbg["revision"],
1848 dbg["revision"],
1848 dbg["delta-base"],
1849 dbg["delta-base"],
1849 dbg["using-cached-base"],
1850 dbg["using-cached-base"],
1850 dbg["search_round_count"],
1851 dbg["search_round_count"],
1851 dbg["delta_try_count"],
1852 dbg["delta_try_count"],
1852 dbg["type"],
1853 dbg["type"],
1853 dbg["snapshot-depth"],
1854 dbg["snapshot-depth"],
1854 dbg["p1-chain-len"],
1855 dbg["p1-chain-len"],
1855 dbg["p2-chain-len"],
1856 dbg["p2-chain-len"],
1856 dbg["duration"],
1857 dbg["duration"],
1857 )
1858 )
1858 self._write_debug(msg)
1859 self._write_debug(msg)
1859
1860
1860
1861
1861 def delta_compression(default_compression_header, deltainfo):
1862 def delta_compression(default_compression_header, deltainfo):
1862 """return (COMPRESSION_MODE, deltainfo)
1863 """return (COMPRESSION_MODE, deltainfo)
1863
1864
1864 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1865 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1865 compression.
1866 compression.
1866 """
1867 """
1867 h, d = deltainfo.data
1868 h, d = deltainfo.data
1868 compression_mode = COMP_MODE_INLINE
1869 compression_mode = COMP_MODE_INLINE
1869 if not h and not d:
1870 if not h and not d:
1870 # not data to store at all... declare them uncompressed
1871 # not data to store at all... declare them uncompressed
1871 compression_mode = COMP_MODE_PLAIN
1872 compression_mode = COMP_MODE_PLAIN
1872 elif not h:
1873 elif not h:
1873 t = d[0:1]
1874 t = d[0:1]
1874 if t == b'\0':
1875 if t == b'\0':
1875 compression_mode = COMP_MODE_PLAIN
1876 compression_mode = COMP_MODE_PLAIN
1876 elif t == default_compression_header:
1877 elif t == default_compression_header:
1877 compression_mode = COMP_MODE_DEFAULT
1878 compression_mode = COMP_MODE_DEFAULT
1878 elif h == b'u':
1879 elif h == b'u':
1879 # we have a more efficient way to declare uncompressed
1880 # we have a more efficient way to declare uncompressed
1880 h = b''
1881 h = b''
1881 compression_mode = COMP_MODE_PLAIN
1882 compression_mode = COMP_MODE_PLAIN
1882 deltainfo = drop_u_compression(deltainfo)
1883 deltainfo = drop_u_compression(deltainfo)
1883 return compression_mode, deltainfo
1884 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now