##// END OF EJS Templates
revlog: remove legacy usage of `_srdensitythreshold`...
marmoute -
r51955:8c614fa1 default
parent child Browse files
Show More
@@ -1,1630 +1,1629 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self._srdensitythreshold = density
54 self._srmingapsize = mingap
53 self._srmingapsize = mingap
55 self.data_config = revlog.DataConfig()
54 self.data_config = revlog.DataConfig()
56 self.data_config.sr_density_threshold = density
55 self.data_config.sr_density_threshold = density
57 self.data_config.sr_min_gap_size = mingap
56 self.data_config.sr_min_gap_size = mingap
58 self.delta_config = revlog.DeltaConfig()
57 self.delta_config = revlog.DeltaConfig()
59 self.feature_config = revlog.FeatureConfig()
58 self.feature_config = revlog.FeatureConfig()
60 self._snapshot = set(snapshot)
59 self._snapshot = set(snapshot)
61 self.index = None
60 self.index = None
62
61
63 def start(self, rev):
62 def start(self, rev):
64 if rev == nullrev:
63 if rev == nullrev:
65 return 0
64 return 0
66 if rev == 0:
65 if rev == 0:
67 return 0
66 return 0
68 return self._data[rev - 1]
67 return self._data[rev - 1]
69
68
70 def end(self, rev):
69 def end(self, rev):
71 if rev == nullrev:
70 if rev == nullrev:
72 return 0
71 return 0
73 return self._data[rev]
72 return self._data[rev]
74
73
75 def length(self, rev):
74 def length(self, rev):
76 return self.end(rev) - self.start(rev)
75 return self.end(rev) - self.start(rev)
77
76
78 def __len__(self):
77 def __len__(self):
79 return len(self._data)
78 return len(self._data)
80
79
81 def issnapshot(self, rev):
80 def issnapshot(self, rev):
82 if rev == nullrev:
81 if rev == nullrev:
83 return True
82 return True
84 return rev in self._snapshot
83 return rev in self._snapshot
85
84
86
85
87 def slicechunk(revlog, revs, targetsize=None):
86 def slicechunk(revlog, revs, targetsize=None):
88 """slice revs to reduce the amount of unrelated data to be read from disk.
87 """slice revs to reduce the amount of unrelated data to be read from disk.
89
88
90 ``revs`` is sliced into groups that should be read in one time.
89 ``revs`` is sliced into groups that should be read in one time.
91 Assume that revs are sorted.
90 Assume that revs are sorted.
92
91
93 The initial chunk is sliced until the overall density (payload/chunks-span
92 The initial chunk is sliced until the overall density (payload/chunks-span
94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
95 `revlog._srmingapsize` is skipped.
94 than `revlog._srmingapsize` is skipped.
96
95
97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
98 For consistency with other slicing choice, this limit won't go lower than
97 For consistency with other slicing choice, this limit won't go lower than
99 `revlog._srmingapsize`.
98 `revlog._srmingapsize`.
100
99
101 If individual revisions chunk are larger than this limit, they will still
100 If individual revisions chunk are larger than this limit, they will still
102 be raised individually.
101 be raised individually.
103
102
104 >>> data = [
103 >>> data = [
105 ... 5, #00 (5)
104 ... 5, #00 (5)
106 ... 10, #01 (5)
105 ... 10, #01 (5)
107 ... 12, #02 (2)
106 ... 12, #02 (2)
108 ... 12, #03 (empty)
107 ... 12, #03 (empty)
109 ... 27, #04 (15)
108 ... 27, #04 (15)
110 ... 31, #05 (4)
109 ... 31, #05 (4)
111 ... 31, #06 (empty)
110 ... 31, #06 (empty)
112 ... 42, #07 (11)
111 ... 42, #07 (11)
113 ... 47, #08 (5)
112 ... 47, #08 (5)
114 ... 47, #09 (empty)
113 ... 47, #09 (empty)
115 ... 48, #10 (1)
114 ... 48, #10 (1)
116 ... 51, #11 (3)
115 ... 51, #11 (3)
117 ... 74, #12 (23)
116 ... 74, #12 (23)
118 ... 85, #13 (11)
117 ... 85, #13 (11)
119 ... 86, #14 (1)
118 ... 86, #14 (1)
120 ... 91, #15 (5)
119 ... 91, #15 (5)
121 ... ]
120 ... ]
122 >>> revlog = _testrevlog(data, snapshot=range(16))
121 >>> revlog = _testrevlog(data, snapshot=range(16))
123
122
124 >>> list(slicechunk(revlog, list(range(16))))
123 >>> list(slicechunk(revlog, list(range(16))))
125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
126 >>> list(slicechunk(revlog, [0, 15]))
125 >>> list(slicechunk(revlog, [0, 15]))
127 [[0], [15]]
126 [[0], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 15]))
127 >>> list(slicechunk(revlog, [0, 11, 15]))
129 [[0], [11], [15]]
128 [[0], [11], [15]]
130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
131 [[0], [11, 13, 15]]
130 [[0], [11, 13, 15]]
132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
133 [[1, 2], [5, 8, 10, 11], [14]]
132 [[1, 2], [5, 8, 10, 11], [14]]
134
133
135 Slicing with a maximum chunk size
134 Slicing with a maximum chunk size
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
137 [[0], [11], [13], [15]]
136 [[0], [11], [13], [15]]
138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
139 [[0], [11], [13, 15]]
138 [[0], [11], [13, 15]]
140
139
141 Slicing involving nullrev
140 Slicing involving nullrev
142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
143 [[-1, 0], [11], [13, 15]]
142 [[-1, 0], [11], [13, 15]]
144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
145 [[-1], [13], [15]]
144 [[-1], [13], [15]]
146 """
145 """
147 if targetsize is not None:
146 if targetsize is not None:
148 targetsize = max(targetsize, revlog._srmingapsize)
147 targetsize = max(targetsize, revlog._srmingapsize)
149 # targetsize should not be specified when evaluating delta candidates:
148 # targetsize should not be specified when evaluating delta candidates:
150 # * targetsize is used to ensure we stay within specification when reading,
149 # * targetsize is used to ensure we stay within specification when reading,
151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
152 if densityslicing is None:
151 if densityslicing is None:
153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
154 for chunk in densityslicing(
153 for chunk in densityslicing(
155 revs, revlog._srdensitythreshold, revlog._srmingapsize
154 revs, revlog.data_config.sr_density_threshold, revlog._srmingapsize
156 ):
155 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
156 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
157 yield subchunk
159
158
160
159
161 def _slicechunktosize(revlog, revs, targetsize=None):
160 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
161 """slice revs to match the target size
163
162
164 This is intended to be used on chunk that density slicing selected by that
163 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
164 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
165 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
166 built in a way that create large blocks next to each other.
168
167
169 >>> data = [
168 >>> data = [
170 ... 3, #0 (3)
169 ... 3, #0 (3)
171 ... 5, #1 (2)
170 ... 5, #1 (2)
172 ... 6, #2 (1)
171 ... 6, #2 (1)
173 ... 8, #3 (2)
172 ... 8, #3 (2)
174 ... 8, #4 (empty)
173 ... 8, #4 (empty)
175 ... 11, #5 (3)
174 ... 11, #5 (3)
176 ... 12, #6 (1)
175 ... 12, #6 (1)
177 ... 13, #7 (1)
176 ... 13, #7 (1)
178 ... 14, #8 (1)
177 ... 14, #8 (1)
179 ... ]
178 ... ]
180
179
181 == All snapshots cases ==
180 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
181 >>> revlog = _testrevlog(data, snapshot=range(9))
183
182
184 Cases where chunk is already small enough
183 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
184 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
185 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
186 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
187 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
188 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
189 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
190 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
191 [[6, 7]]
193
192
194 cases where we need actual slicing
193 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
194 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
195 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
196 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
197 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
198 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
199 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
200 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
201 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
203 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
204 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
205 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
206 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
207 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
208
210 Case with too large individual chunk (must return valid chunk)
209 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
210 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
211 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
212 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
213 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
214 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
215 [[3], [5]]
217
216
218 == No Snapshot cases ==
217 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
218 >>> revlog = _testrevlog(data)
220
219
221 Cases where chunk is already small enough
220 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
221 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
222 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
223 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
224 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
225 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
226 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
227 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
228 [[6, 7]]
230
229
231 cases where we need actual slicing
230 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
231 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
232 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
233 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
234 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
235 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
236 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
237 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
238 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
240 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
241 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
242 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
243 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
244 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
245
247 Case with too large individual chunk (must return valid chunk)
246 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
247 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
248 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
249 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
250 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
251 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
252 [[3], [5]]
254
253
255 == mixed case ==
254 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
255 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
256 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
257 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
258 """
260 assert targetsize is None or 0 <= targetsize
259 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
260 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
261 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
262 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
263 if targetsize is None or fullspan <= targetsize:
265 yield revs
264 yield revs
266 return
265 return
267
266
268 startrevidx = 0
267 startrevidx = 0
269 endrevidx = 1
268 endrevidx = 1
270 iterrevs = enumerate(revs)
269 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
270 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
271 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
272 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
273 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
274 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
275 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
276 endrevidx = idx + 1
278 else:
277 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
278 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
279 if chunk:
281 yield chunk
280 yield chunk
282 startrevidx = idx
281 startrevidx = idx
283 startdata = revlog.start(r)
282 startdata = revlog.start(r)
284 endrevidx = idx + 1
283 endrevidx = idx + 1
285 if not snapshot:
284 if not snapshot:
286 break
285 break
287
286
288 # for the others, we use binary slicing to quickly converge toward valid
287 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
288 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
289 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
290 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
291 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
292 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
293 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
294 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
295 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
296 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
297 span = localenddata - startdata
299 while span > targetsize:
298 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
299 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
300 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
301 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
302 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
303 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
304 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
305 if chunk:
307 yield chunk
306 yield chunk
308 startrevidx = endrevidx
307 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
308 startdata = revlog.start(revs[startrevidx])
310
309
311 chunk = _trimchunk(revlog, revs, startrevidx)
310 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
311 if chunk:
313 yield chunk
312 yield chunk
314
313
315
314
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
315 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
316 """slice revs to reduce the amount of unrelated data to be read from disk.
318
317
319 ``revs`` is sliced into groups that should be read in one time.
318 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
319 Assume that revs are sorted.
321
320
322 The initial chunk is sliced until the overall density (payload/chunks-span
321 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
322 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
323 skipped.
325
324
326 >>> revlog = _testrevlog([
325 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
326 ... 5, #00 (5)
328 ... 10, #01 (5)
327 ... 10, #01 (5)
329 ... 12, #02 (2)
328 ... 12, #02 (2)
330 ... 12, #03 (empty)
329 ... 12, #03 (empty)
331 ... 27, #04 (15)
330 ... 27, #04 (15)
332 ... 31, #05 (4)
331 ... 31, #05 (4)
333 ... 31, #06 (empty)
332 ... 31, #06 (empty)
334 ... 42, #07 (11)
333 ... 42, #07 (11)
335 ... 47, #08 (5)
334 ... 47, #08 (5)
336 ... 47, #09 (empty)
335 ... 47, #09 (empty)
337 ... 48, #10 (1)
336 ... 48, #10 (1)
338 ... 51, #11 (3)
337 ... 51, #11 (3)
339 ... 74, #12 (23)
338 ... 74, #12 (23)
340 ... 85, #13 (11)
339 ... 85, #13 (11)
341 ... 86, #14 (1)
340 ... 86, #14 (1)
342 ... 91, #15 (5)
341 ... 91, #15 (5)
343 ... ])
342 ... ])
344
343
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
344 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
345 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
346 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
347 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
348 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
349 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
350 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
351 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
353 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
355 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
356 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
358 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
359 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
361 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
362 [[1, 2], [5, 8, 10, 11], [14]]
364 """
363 """
365 start = revlog.start
364 start = revlog.start
366 length = revlog.length
365 length = revlog.length
367
366
368 if len(revs) <= 1:
367 if len(revs) <= 1:
369 yield revs
368 yield revs
370 return
369 return
371
370
372 deltachainspan = segmentspan(revlog, revs)
371 deltachainspan = segmentspan(revlog, revs)
373
372
374 if deltachainspan < mingapsize:
373 if deltachainspan < mingapsize:
375 yield revs
374 yield revs
376 return
375 return
377
376
378 readdata = deltachainspan
377 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
378 chainpayload = sum(length(r) for r in revs)
380
379
381 if deltachainspan:
380 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
381 density = chainpayload / float(deltachainspan)
383 else:
382 else:
384 density = 1.0
383 density = 1.0
385
384
386 if density >= targetdensity:
385 if density >= targetdensity:
387 yield revs
386 yield revs
388 return
387 return
389
388
390 # Store the gaps in a heap to have them sorted by decreasing size
389 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
390 gaps = []
392 prevend = None
391 prevend = None
393 for i, rev in enumerate(revs):
392 for i, rev in enumerate(revs):
394 revstart = start(rev)
393 revstart = start(rev)
395 revlen = length(rev)
394 revlen = length(rev)
396
395
397 # Skip empty revisions to form larger holes
396 # Skip empty revisions to form larger holes
398 if revlen == 0:
397 if revlen == 0:
399 continue
398 continue
400
399
401 if prevend is not None:
400 if prevend is not None:
402 gapsize = revstart - prevend
401 gapsize = revstart - prevend
403 # only consider holes that are large enough
402 # only consider holes that are large enough
404 if gapsize > mingapsize:
403 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
404 gaps.append((gapsize, i))
406
405
407 prevend = revstart + revlen
406 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
407 # sort the gaps to pop them from largest to small
409 gaps.sort()
408 gaps.sort()
410
409
411 # Collect the indices of the largest holes until the density is acceptable
410 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
411 selected = []
413 while gaps and density < targetdensity:
412 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
413 gapsize, gapidx = gaps.pop()
415
414
416 selected.append(gapidx)
415 selected.append(gapidx)
417
416
418 # the gap sizes are stored as negatives to be sorted decreasingly
417 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
418 # by the heap
420 readdata -= gapsize
419 readdata -= gapsize
421 if readdata > 0:
420 if readdata > 0:
422 density = chainpayload / float(readdata)
421 density = chainpayload / float(readdata)
423 else:
422 else:
424 density = 1.0
423 density = 1.0
425 selected.sort()
424 selected.sort()
426
425
427 # Cut the revs at collected indices
426 # Cut the revs at collected indices
428 previdx = 0
427 previdx = 0
429 for idx in selected:
428 for idx in selected:
430
429
431 chunk = _trimchunk(revlog, revs, previdx, idx)
430 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
431 if chunk:
433 yield chunk
432 yield chunk
434
433
435 previdx = idx
434 previdx = idx
436
435
437 chunk = _trimchunk(revlog, revs, previdx)
436 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
437 if chunk:
439 yield chunk
438 yield chunk
440
439
441
440
442 def _trimchunk(revlog, revs, startidx, endidx=None):
441 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
442 """returns revs[startidx:endidx] without empty trailing revs
444
443
445 Doctest Setup
444 Doctest Setup
446 >>> revlog = _testrevlog([
445 >>> revlog = _testrevlog([
447 ... 5, #0
446 ... 5, #0
448 ... 10, #1
447 ... 10, #1
449 ... 12, #2
448 ... 12, #2
450 ... 12, #3 (empty)
449 ... 12, #3 (empty)
451 ... 17, #4
450 ... 17, #4
452 ... 21, #5
451 ... 21, #5
453 ... 21, #6 (empty)
452 ... 21, #6 (empty)
454 ... ])
453 ... ])
455
454
456 Contiguous cases:
455 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
457 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
459 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
461 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
463 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
465 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
467 [3, 4]
469
468
470 Discontiguous cases:
469 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
471 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
473 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
475 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
477 [3, 5]
479 """
478 """
480 length = revlog.length
479 length = revlog.length
481
480
482 if endidx is None:
481 if endidx is None:
483 endidx = len(revs)
482 endidx = len(revs)
484
483
485 # If we have a non-emtpy delta candidate, there are nothing to trim
484 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
485 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
486 # Trim empty revs at the end, except the very first revision of a chain
488 while (
487 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
488 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
489 ):
491 endidx -= 1
490 endidx -= 1
492
491
493 return revs[startidx:endidx]
492 return revs[startidx:endidx]
494
493
495
494
496 def segmentspan(revlog, revs):
495 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
496 """Get the byte span of a segment of revisions
498
497
499 revs is a sorted array of revision numbers
498 revs is a sorted array of revision numbers
500
499
501 >>> revlog = _testrevlog([
500 >>> revlog = _testrevlog([
502 ... 5, #0
501 ... 5, #0
503 ... 10, #1
502 ... 10, #1
504 ... 12, #2
503 ... 12, #2
505 ... 12, #3 (empty)
504 ... 12, #3 (empty)
506 ... 17, #4
505 ... 17, #4
507 ... ])
506 ... ])
508
507
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
508 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
509 17
511 >>> segmentspan(revlog, [0, 4])
510 >>> segmentspan(revlog, [0, 4])
512 17
511 17
513 >>> segmentspan(revlog, [3, 4])
512 >>> segmentspan(revlog, [3, 4])
514 5
513 5
515 >>> segmentspan(revlog, [1, 2, 3,])
514 >>> segmentspan(revlog, [1, 2, 3,])
516 7
515 7
517 >>> segmentspan(revlog, [1, 3])
516 >>> segmentspan(revlog, [1, 3])
518 7
517 7
519 """
518 """
520 if not revs:
519 if not revs:
521 return 0
520 return 0
522 end = revlog.end(revs[-1])
521 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
522 return end - revlog.start(revs[0])
524
523
525
524
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
525 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
526 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
527 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
528 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
529 # they're decoded.
531 hlen = struct.calcsize(b">lll")
530 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
531 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
532 revlog.rawsize(baserev), len(delta) - hlen
534 ):
533 ):
535 fulltext = delta[hlen:]
534 fulltext = delta[hlen:]
536 else:
535 else:
537 # deltabase is rawtext before changed by flag processors, which is
536 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
537 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
538 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
539 fulltext = mdiff.patch(basetext, delta)
541
540
542 try:
541 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
542 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
543 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
544 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
545 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
546 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
547 _(b'node %s is not censored') % expectednode
549 )
548 )
550 except error.CensoredNodeError:
549 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
550 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
551 if not flags & REVIDX_ISCENSORED:
553 raise
552 raise
554 return fulltext
553 return fulltext
555
554
556
555
557 @attr.s(slots=True, frozen=True)
556 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
557 class _deltainfo:
559 distance = attr.ib()
558 distance = attr.ib()
560 deltalen = attr.ib()
559 deltalen = attr.ib()
561 data = attr.ib()
560 data = attr.ib()
562 base = attr.ib()
561 base = attr.ib()
563 chainbase = attr.ib()
562 chainbase = attr.ib()
564 chainlen = attr.ib()
563 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
564 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
565 snapshotdepth = attr.ib()
567
566
568
567
569 def drop_u_compression(delta):
568 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
569 """turn into a "u" (no-compression) into no-compression without header
571
570
572 This is useful for revlog format that has better compression method.
571 This is useful for revlog format that has better compression method.
573 """
572 """
574 assert delta.data[0] == b'u', delta.data[0]
573 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
574 return _deltainfo(
576 delta.distance,
575 delta.distance,
577 delta.deltalen - 1,
576 delta.deltalen - 1,
578 (b'', delta.data[1]),
577 (b'', delta.data[1]),
579 delta.base,
578 delta.base,
580 delta.chainbase,
579 delta.chainbase,
581 delta.chainlen,
580 delta.chainlen,
582 delta.compresseddeltalen,
581 delta.compresseddeltalen,
583 delta.snapshotdepth,
582 delta.snapshotdepth,
584 )
583 )
585
584
586
585
587 def is_good_delta_info(revlog, deltainfo, revinfo):
586 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
587 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
588 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
589 performant."""
591 if deltainfo is None:
590 if deltainfo is None:
592 return False
591 return False
593
592
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
593 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
594 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
595 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
596 assert (
598 revinfo.cachedelta is None
597 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
598 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
599 or not revlog.delta_config.general_delta
601 )
600 )
602
601
603 # - 'deltainfo.distance' is the distance from the base revision --
602 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
603 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
604 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
605 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
606 # we consume.
608
607
609 textlen = revinfo.textlen
608 textlen = revinfo.textlen
610 defaultmax = textlen * 4
609 defaultmax = textlen * 4
611 maxdist = revlog.delta_config.max_deltachain_span
610 maxdist = revlog.delta_config.max_deltachain_span
612 if not maxdist:
611 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
612 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
613 maxdist = max(maxdist, defaultmax)
615
614
616 # Bad delta from read span:
615 # Bad delta from read span:
617 #
616 #
618 # If the span of data read is larger than the maximum allowed.
617 # If the span of data read is larger than the maximum allowed.
619 #
618 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
619 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
620 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
621 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
622 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
623 # we skip the span check entirely.
625 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
624 if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance:
626 return False
625 return False
627
626
628 # Bad delta from new delta size:
627 # Bad delta from new delta size:
629 #
628 #
630 # If the delta size is larger than the target text, storing the
629 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
630 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
631 if textlen < deltainfo.deltalen:
633 return False
632 return False
634
633
635 # Bad delta from cumulated payload size:
634 # Bad delta from cumulated payload size:
636 #
635 #
637 # If the sum of delta get larger than K * target text length.
636 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
637 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
638 return False
640
639
641 # Bad delta from chain length:
640 # Bad delta from chain length:
642 #
641 #
643 # If the number of delta in the chain gets too high.
642 # If the number of delta in the chain gets too high.
644 if (
643 if (
645 revlog.delta_config.max_chain_len
644 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
645 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
646 ):
648 return False
647 return False
649
648
650 # bad delta from intermediate snapshot size limit
649 # bad delta from intermediate snapshot size limit
651 #
650 #
652 # If an intermediate snapshot size is higher than the limit. The
651 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
652 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
653 # created.
655 if (
654 if (
656 deltainfo.snapshotdepth is not None
655 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
656 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
657 ):
659 return False
658 return False
660
659
661 # bad delta if new intermediate snapshot is larger than the previous
660 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
661 # snapshot
663 if (
662 if (
664 deltainfo.snapshotdepth
663 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
664 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
665 ):
667 return False
666 return False
668
667
669 return True
668 return True
670
669
671
670
672 # If a revision's full text is that much bigger than a base candidate full
671 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
672 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
673 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
674 LIMIT_BASE2TEXT = 500
676
675
677
676
678 def _candidategroups(
677 def _candidategroups(
679 revlog,
678 revlog,
680 textlen,
679 textlen,
681 p1,
680 p1,
682 p2,
681 p2,
683 cachedelta,
682 cachedelta,
684 excluded_bases=None,
683 excluded_bases=None,
685 target_rev=None,
684 target_rev=None,
686 snapshot_cache=None,
685 snapshot_cache=None,
687 ):
686 ):
688 """Provides group of revision to be tested as delta base
687 """Provides group of revision to be tested as delta base
689
688
690 This top level function focus on emitting groups with unique and worthwhile
689 This top level function focus on emitting groups with unique and worthwhile
691 content. See _raw_candidate_groups for details about the group order.
690 content. See _raw_candidate_groups for details about the group order.
692 """
691 """
693 # should we try to build a delta?
692 # should we try to build a delta?
694 if not (len(revlog) and revlog._storedeltachains):
693 if not (len(revlog) and revlog._storedeltachains):
695 yield None
694 yield None
696 return
695 return
697
696
698 if target_rev is None:
697 if target_rev is None:
699 target_rev = len(revlog)
698 target_rev = len(revlog)
700
699
701 if not revlog.delta_config.general_delta:
700 if not revlog.delta_config.general_delta:
702 # before general delta, there is only one possible delta base
701 # before general delta, there is only one possible delta base
703 yield (target_rev - 1,)
702 yield (target_rev - 1,)
704 yield None
703 yield None
705 return
704 return
706
705
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
706 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
708 # we should never end up asking such question. Adding the assert as a
707 # we should never end up asking such question. Adding the assert as a
709 # safe-guard to detect anything that would be fishy in this regard.
708 # safe-guard to detect anything that would be fishy in this regard.
710 assert (
709 assert (
711 cachedelta is None
710 cachedelta is None
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
711 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
713 or not revlog.delta_config.general_delta
712 or not revlog.delta_config.general_delta
714 )
713 )
715
714
716 deltalength = revlog.length
715 deltalength = revlog.length
717 deltaparent = revlog.deltaparent
716 deltaparent = revlog.deltaparent
718 sparse = revlog.delta_config.sparse_revlog
717 sparse = revlog.delta_config.sparse_revlog
719 good = None
718 good = None
720
719
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
720 deltas_limit = textlen * LIMIT_DELTA2TEXT
722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
721 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
723
722
724 tested = {nullrev}
723 tested = {nullrev}
725 candidates = _refinedgroups(
724 candidates = _refinedgroups(
726 revlog,
725 revlog,
727 p1,
726 p1,
728 p2,
727 p2,
729 cachedelta,
728 cachedelta,
730 snapshot_cache=snapshot_cache,
729 snapshot_cache=snapshot_cache,
731 )
730 )
732 while True:
731 while True:
733 temptative = candidates.send(good)
732 temptative = candidates.send(good)
734 if temptative is None:
733 if temptative is None:
735 break
734 break
736 group = []
735 group = []
737 for rev in temptative:
736 for rev in temptative:
738 # skip over empty delta (no need to include them in a chain)
737 # skip over empty delta (no need to include them in a chain)
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
738 while not (rev == nullrev or rev in tested or deltalength(rev)):
740 tested.add(rev)
739 tested.add(rev)
741 rev = deltaparent(rev)
740 rev = deltaparent(rev)
742 # no need to try a delta against nullrev, this will be done as a
741 # no need to try a delta against nullrev, this will be done as a
743 # last resort.
742 # last resort.
744 if rev == nullrev:
743 if rev == nullrev:
745 continue
744 continue
746 # filter out revision we tested already
745 # filter out revision we tested already
747 if rev in tested:
746 if rev in tested:
748 continue
747 continue
749
748
750 # an higher authority deamed the base unworthy (e.g. censored)
749 # an higher authority deamed the base unworthy (e.g. censored)
751 if excluded_bases is not None and rev in excluded_bases:
750 if excluded_bases is not None and rev in excluded_bases:
752 tested.add(rev)
751 tested.add(rev)
753 continue
752 continue
754 # We are in some recomputation cases and that rev is too high in
753 # We are in some recomputation cases and that rev is too high in
755 # the revlog
754 # the revlog
756 if target_rev is not None and rev >= target_rev:
755 if target_rev is not None and rev >= target_rev:
757 tested.add(rev)
756 tested.add(rev)
758 continue
757 continue
759 # filter out delta base that will never produce good delta
758 # filter out delta base that will never produce good delta
760 if deltas_limit < revlog.length(rev):
759 if deltas_limit < revlog.length(rev):
761 tested.add(rev)
760 tested.add(rev)
762 continue
761 continue
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
762 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
764 tested.add(rev)
763 tested.add(rev)
765 continue
764 continue
766 # no delta for rawtext-changing revs (see "candelta" for why)
765 # no delta for rawtext-changing revs (see "candelta" for why)
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
768 tested.add(rev)
767 tested.add(rev)
769 continue
768 continue
770
769
771 # If we reach here, we are about to build and test a delta.
770 # If we reach here, we are about to build and test a delta.
772 # The delta building process will compute the chaininfo in all
771 # The delta building process will compute the chaininfo in all
773 # case, since that computation is cached, it is fine to access it
772 # case, since that computation is cached, it is fine to access it
774 # here too.
773 # here too.
775 chainlen, chainsize = revlog._chaininfo(rev)
774 chainlen, chainsize = revlog._chaininfo(rev)
776 # if chain will be too long, skip base
775 # if chain will be too long, skip base
777 if (
776 if (
778 revlog.delta_config.max_chain_len
777 revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
778 and chainlen >= revlog.delta_config.max_chain_len
780 ):
779 ):
781 tested.add(rev)
780 tested.add(rev)
782 continue
781 continue
783 # if chain already have too much data, skip base
782 # if chain already have too much data, skip base
784 if deltas_limit < chainsize:
783 if deltas_limit < chainsize:
785 tested.add(rev)
784 tested.add(rev)
786 continue
785 continue
787 if sparse and revlog.upperboundcomp is not None:
786 if sparse and revlog.upperboundcomp is not None:
788 maxcomp = revlog.upperboundcomp
787 maxcomp = revlog.upperboundcomp
789 basenotsnap = (p1, p2, nullrev)
788 basenotsnap = (p1, p2, nullrev)
790 if rev not in basenotsnap and revlog.issnapshot(rev):
789 if rev not in basenotsnap and revlog.issnapshot(rev):
791 snapshotdepth = revlog.snapshotdepth(rev)
790 snapshotdepth = revlog.snapshotdepth(rev)
792 # If text is significantly larger than the base, we can
791 # If text is significantly larger than the base, we can
793 # expect the resulting delta to be proportional to the size
792 # expect the resulting delta to be proportional to the size
794 # difference
793 # difference
795 revsize = revlog.rawsize(rev)
794 revsize = revlog.rawsize(rev)
796 rawsizedistance = max(textlen - revsize, 0)
795 rawsizedistance = max(textlen - revsize, 0)
797 # use an estimate of the compression upper bound.
796 # use an estimate of the compression upper bound.
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
797 lowestrealisticdeltalen = rawsizedistance // maxcomp
799
798
800 # check the absolute constraint on the delta size
799 # check the absolute constraint on the delta size
801 snapshotlimit = textlen >> snapshotdepth
800 snapshotlimit = textlen >> snapshotdepth
802 if snapshotlimit < lowestrealisticdeltalen:
801 if snapshotlimit < lowestrealisticdeltalen:
803 # delta lower bound is larger than accepted upper bound
802 # delta lower bound is larger than accepted upper bound
804 tested.add(rev)
803 tested.add(rev)
805 continue
804 continue
806
805
807 # check the relative constraint on the delta size
806 # check the relative constraint on the delta size
808 revlength = revlog.length(rev)
807 revlength = revlog.length(rev)
809 if revlength < lowestrealisticdeltalen:
808 if revlength < lowestrealisticdeltalen:
810 # delta probable lower bound is larger than target base
809 # delta probable lower bound is larger than target base
811 tested.add(rev)
810 tested.add(rev)
812 continue
811 continue
813
812
814 group.append(rev)
813 group.append(rev)
815 if group:
814 if group:
816 # When the size of the candidate group is big, it can result in a
815 # When the size of the candidate group is big, it can result in a
817 # quite significant performance impact. To reduce this, we can send
816 # quite significant performance impact. To reduce this, we can send
818 # them in smaller batches until the new batch does not provide any
817 # them in smaller batches until the new batch does not provide any
819 # improvements.
818 # improvements.
820 #
819 #
821 # This might reduce the overall efficiency of the compression in
820 # This might reduce the overall efficiency of the compression in
822 # some corner cases, but that should also prevent very pathological
821 # some corner cases, but that should also prevent very pathological
823 # cases from being an issue. (eg. 20 000 candidates).
822 # cases from being an issue. (eg. 20 000 candidates).
824 #
823 #
825 # XXX note that the ordering of the group becomes important as it
824 # XXX note that the ordering of the group becomes important as it
826 # now impacts the final result. The current order is unprocessed
825 # now impacts the final result. The current order is unprocessed
827 # and can be improved.
826 # and can be improved.
828 if group_chunk_size == 0:
827 if group_chunk_size == 0:
829 tested.update(group)
828 tested.update(group)
830 good = yield tuple(group)
829 good = yield tuple(group)
831 else:
830 else:
832 prev_good = good
831 prev_good = good
833 for start in range(0, len(group), group_chunk_size):
832 for start in range(0, len(group), group_chunk_size):
834 sub_group = group[start : start + group_chunk_size]
833 sub_group = group[start : start + group_chunk_size]
835 tested.update(sub_group)
834 tested.update(sub_group)
836 good = yield tuple(sub_group)
835 good = yield tuple(sub_group)
837 if prev_good == good:
836 if prev_good == good:
838 break
837 break
839
838
840 yield None
839 yield None
841
840
842
841
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
842 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
844 good = None
843 good = None
845 # First we try to reuse a the delta contained in the bundle.
844 # First we try to reuse a the delta contained in the bundle.
846 # (or from the source revlog)
845 # (or from the source revlog)
847 #
846 #
848 # This logic only applies to general delta repositories and can be disabled
847 # This logic only applies to general delta repositories and can be disabled
849 # through configuration. Disabling reuse source delta is useful when
848 # through configuration. Disabling reuse source delta is useful when
850 # we want to make sure we recomputed "optimal" deltas.
849 # we want to make sure we recomputed "optimal" deltas.
851 debug_info = None
850 debug_info = None
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
851 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
853 # Assume what we received from the server is a good choice
852 # Assume what we received from the server is a good choice
854 # build delta will reuse the cache
853 # build delta will reuse the cache
855 if debug_info is not None:
854 if debug_info is not None:
856 debug_info['cached-delta.tested'] += 1
855 debug_info['cached-delta.tested'] += 1
857 good = yield (cachedelta[0],)
856 good = yield (cachedelta[0],)
858 if good is not None:
857 if good is not None:
859 if debug_info is not None:
858 if debug_info is not None:
860 debug_info['cached-delta.accepted'] += 1
859 debug_info['cached-delta.accepted'] += 1
861 yield None
860 yield None
862 return
861 return
863 if snapshot_cache is None:
862 if snapshot_cache is None:
864 snapshot_cache = SnapshotCache()
863 snapshot_cache = SnapshotCache()
865 groups = _rawgroups(
864 groups = _rawgroups(
866 revlog,
865 revlog,
867 p1,
866 p1,
868 p2,
867 p2,
869 cachedelta,
868 cachedelta,
870 snapshot_cache,
869 snapshot_cache,
871 )
870 )
872 for candidates in groups:
871 for candidates in groups:
873 good = yield candidates
872 good = yield candidates
874 if good is not None:
873 if good is not None:
875 break
874 break
876
875
877 # If sparse revlog is enabled, we can try to refine the available deltas
876 # If sparse revlog is enabled, we can try to refine the available deltas
878 if not revlog.delta_config.sparse_revlog:
877 if not revlog.delta_config.sparse_revlog:
879 yield None
878 yield None
880 return
879 return
881
880
882 # if we have a refinable value, try to refine it
881 # if we have a refinable value, try to refine it
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
882 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
884 # refine snapshot down
883 # refine snapshot down
885 previous = None
884 previous = None
886 while previous != good:
885 while previous != good:
887 previous = good
886 previous = good
888 base = revlog.deltaparent(good)
887 base = revlog.deltaparent(good)
889 if base == nullrev:
888 if base == nullrev:
890 break
889 break
891 good = yield (base,)
890 good = yield (base,)
892 # refine snapshot up
891 # refine snapshot up
893 if not snapshot_cache.snapshots:
892 if not snapshot_cache.snapshots:
894 snapshot_cache.update(revlog, good + 1)
893 snapshot_cache.update(revlog, good + 1)
895 previous = None
894 previous = None
896 while good != previous:
895 while good != previous:
897 previous = good
896 previous = good
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
897 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
899 good = yield children
898 good = yield children
900
899
901 if debug_info is not None:
900 if debug_info is not None:
902 if good is None:
901 if good is None:
903 debug_info['no-solution'] += 1
902 debug_info['no-solution'] += 1
904
903
905 yield None
904 yield None
906
905
907
906
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
907 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
909 """Provides group of revision to be tested as delta base
908 """Provides group of revision to be tested as delta base
910
909
911 This lower level function focus on emitting delta theorically interresting
910 This lower level function focus on emitting delta theorically interresting
912 without looking it any practical details.
911 without looking it any practical details.
913
912
914 The group order aims at providing fast or small candidates first.
913 The group order aims at providing fast or small candidates first.
915 """
914 """
916 # Why search for delta base if we cannot use a delta base ?
915 # Why search for delta base if we cannot use a delta base ?
917 assert revlog.delta_config.general_delta
916 assert revlog.delta_config.general_delta
918 # also see issue6056
917 # also see issue6056
919 sparse = revlog.delta_config.sparse_revlog
918 sparse = revlog.delta_config.sparse_revlog
920 curr = len(revlog)
919 curr = len(revlog)
921 prev = curr - 1
920 prev = curr - 1
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
921 deltachain = lambda rev: revlog._deltachain(rev)[0]
923
922
924 # exclude already lazy tested base if any
923 # exclude already lazy tested base if any
925 parents = [p for p in (p1, p2) if p != nullrev]
924 parents = [p for p in (p1, p2) if p != nullrev]
926
925
927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
926 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
928 parents.sort()
927 parents.sort()
929 # To minimize the chance of having to build a fulltext,
928 # To minimize the chance of having to build a fulltext,
930 # pick first whichever parent is closest to us (max rev)
929 # pick first whichever parent is closest to us (max rev)
931 yield (parents[1],)
930 yield (parents[1],)
932 # then the other one (min rev) if the first did not fit
931 # then the other one (min rev) if the first did not fit
933 yield (parents[0],)
932 yield (parents[0],)
934 elif len(parents) > 0:
933 elif len(parents) > 0:
935 # Test all parents (1 or 2), and keep the best candidate
934 # Test all parents (1 or 2), and keep the best candidate
936 yield parents
935 yield parents
937
936
938 if sparse and parents:
937 if sparse and parents:
939 if snapshot_cache is None:
938 if snapshot_cache is None:
940 # map: base-rev: [snapshot-revs]
939 # map: base-rev: [snapshot-revs]
941 snapshot_cache = SnapshotCache()
940 snapshot_cache = SnapshotCache()
942 # See if we can use an existing snapshot in the parent chains to use as
941 # See if we can use an existing snapshot in the parent chains to use as
943 # a base for a new intermediate-snapshot
942 # a base for a new intermediate-snapshot
944 #
943 #
945 # search for snapshot in parents delta chain
944 # search for snapshot in parents delta chain
946 # map: snapshot-level: snapshot-rev
945 # map: snapshot-level: snapshot-rev
947 parents_snaps = collections.defaultdict(set)
946 parents_snaps = collections.defaultdict(set)
948 candidate_chains = [deltachain(p) for p in parents]
947 candidate_chains = [deltachain(p) for p in parents]
949 for chain in candidate_chains:
948 for chain in candidate_chains:
950 for idx, s in enumerate(chain):
949 for idx, s in enumerate(chain):
951 if not revlog.issnapshot(s):
950 if not revlog.issnapshot(s):
952 break
951 break
953 parents_snaps[idx].add(s)
952 parents_snaps[idx].add(s)
954 snapfloor = min(parents_snaps[0]) + 1
953 snapfloor = min(parents_snaps[0]) + 1
955 snapshot_cache.update(revlog, snapfloor)
954 snapshot_cache.update(revlog, snapfloor)
956 # search for the highest "unrelated" revision
955 # search for the highest "unrelated" revision
957 #
956 #
958 # Adding snapshots used by "unrelated" revision increase the odd we
957 # Adding snapshots used by "unrelated" revision increase the odd we
959 # reuse an independant, yet better snapshot chain.
958 # reuse an independant, yet better snapshot chain.
960 #
959 #
961 # XXX instead of building a set of revisions, we could lazily enumerate
960 # XXX instead of building a set of revisions, we could lazily enumerate
962 # over the chains. That would be more efficient, however we stick to
961 # over the chains. That would be more efficient, however we stick to
963 # simple code for now.
962 # simple code for now.
964 all_revs = set()
963 all_revs = set()
965 for chain in candidate_chains:
964 for chain in candidate_chains:
966 all_revs.update(chain)
965 all_revs.update(chain)
967 other = None
966 other = None
968 for r in revlog.revs(prev, snapfloor):
967 for r in revlog.revs(prev, snapfloor):
969 if r not in all_revs:
968 if r not in all_revs:
970 other = r
969 other = r
971 break
970 break
972 if other is not None:
971 if other is not None:
973 # To avoid unfair competition, we won't use unrelated intermediate
972 # To avoid unfair competition, we won't use unrelated intermediate
974 # snapshot that are deeper than the ones from the parent delta
973 # snapshot that are deeper than the ones from the parent delta
975 # chain.
974 # chain.
976 max_depth = max(parents_snaps.keys())
975 max_depth = max(parents_snaps.keys())
977 chain = deltachain(other)
976 chain = deltachain(other)
978 for depth, s in enumerate(chain):
977 for depth, s in enumerate(chain):
979 if s < snapfloor:
978 if s < snapfloor:
980 continue
979 continue
981 if max_depth < depth:
980 if max_depth < depth:
982 break
981 break
983 if not revlog.issnapshot(s):
982 if not revlog.issnapshot(s):
984 break
983 break
985 parents_snaps[depth].add(s)
984 parents_snaps[depth].add(s)
986 # Test them as possible intermediate snapshot base
985 # Test them as possible intermediate snapshot base
987 # We test them from highest to lowest level. High level one are more
986 # We test them from highest to lowest level. High level one are more
988 # likely to result in small delta
987 # likely to result in small delta
989 floor = None
988 floor = None
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
989 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
991 siblings = set()
990 siblings = set()
992 for s in snaps:
991 for s in snaps:
993 siblings.update(snapshot_cache.snapshots[s])
992 siblings.update(snapshot_cache.snapshots[s])
994 # Before considering making a new intermediate snapshot, we check
993 # Before considering making a new intermediate snapshot, we check
995 # if an existing snapshot, children of base we consider, would be
994 # if an existing snapshot, children of base we consider, would be
996 # suitable.
995 # suitable.
997 #
996 #
998 # It give a change to reuse a delta chain "unrelated" to the
997 # It give a change to reuse a delta chain "unrelated" to the
999 # current revision instead of starting our own. Without such
998 # current revision instead of starting our own. Without such
1000 # re-use, topological branches would keep reopening new chains.
999 # re-use, topological branches would keep reopening new chains.
1001 # Creating more and more snapshot as the repository grow.
1000 # Creating more and more snapshot as the repository grow.
1002
1001
1003 if floor is not None:
1002 if floor is not None:
1004 # We only do this for siblings created after the one in our
1003 # We only do this for siblings created after the one in our
1005 # parent's delta chain. Those created before has less chances
1004 # parent's delta chain. Those created before has less chances
1006 # to be valid base since our ancestors had to create a new
1005 # to be valid base since our ancestors had to create a new
1007 # snapshot.
1006 # snapshot.
1008 siblings = [r for r in siblings if floor < r]
1007 siblings = [r for r in siblings if floor < r]
1009 yield tuple(sorted(siblings))
1008 yield tuple(sorted(siblings))
1010 # then test the base from our parent's delta chain.
1009 # then test the base from our parent's delta chain.
1011 yield tuple(sorted(snaps))
1010 yield tuple(sorted(snaps))
1012 floor = min(snaps)
1011 floor = min(snaps)
1013 # No suitable base found in the parent chain, search if any full
1012 # No suitable base found in the parent chain, search if any full
1014 # snapshots emitted since parent's base would be a suitable base for an
1013 # snapshots emitted since parent's base would be a suitable base for an
1015 # intermediate snapshot.
1014 # intermediate snapshot.
1016 #
1015 #
1017 # It give a chance to reuse a delta chain unrelated to the current
1016 # It give a chance to reuse a delta chain unrelated to the current
1018 # revisions instead of starting our own. Without such re-use,
1017 # revisions instead of starting our own. Without such re-use,
1019 # topological branches would keep reopening new full chains. Creating
1018 # topological branches would keep reopening new full chains. Creating
1020 # more and more snapshot as the repository grow.
1019 # more and more snapshot as the repository grow.
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1020 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1022 yield tuple(sorted(full))
1021 yield tuple(sorted(full))
1023
1022
1024 if not sparse:
1023 if not sparse:
1025 # other approach failed try against prev to hopefully save us a
1024 # other approach failed try against prev to hopefully save us a
1026 # fulltext.
1025 # fulltext.
1027 yield (prev,)
1026 yield (prev,)
1028
1027
1029
1028
1030 class SnapshotCache:
1029 class SnapshotCache:
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1030 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1032
1031
1033 def __init__(self):
1032 def __init__(self):
1034 self.snapshots = collections.defaultdict(set)
1033 self.snapshots = collections.defaultdict(set)
1035 self._start_rev = None
1034 self._start_rev = None
1036 self._end_rev = None
1035 self._end_rev = None
1037
1036
1038 def update(self, revlog, start_rev=0):
1037 def update(self, revlog, start_rev=0):
1039 """find snapshots from start_rev to tip"""
1038 """find snapshots from start_rev to tip"""
1040 nb_revs = len(revlog)
1039 nb_revs = len(revlog)
1041 end_rev = nb_revs - 1
1040 end_rev = nb_revs - 1
1042 if start_rev > end_rev:
1041 if start_rev > end_rev:
1043 return # range is empty
1042 return # range is empty
1044
1043
1045 if self._start_rev is None:
1044 if self._start_rev is None:
1046 assert self._end_rev is None
1045 assert self._end_rev is None
1047 self._update(revlog, start_rev, end_rev)
1046 self._update(revlog, start_rev, end_rev)
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1047 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1049 if start_rev < self._start_rev:
1048 if start_rev < self._start_rev:
1050 self._update(revlog, start_rev, self._start_rev - 1)
1049 self._update(revlog, start_rev, self._start_rev - 1)
1051 if self._end_rev < end_rev:
1050 if self._end_rev < end_rev:
1052 self._update(revlog, self._end_rev + 1, end_rev)
1051 self._update(revlog, self._end_rev + 1, end_rev)
1053
1052
1054 if self._start_rev is None:
1053 if self._start_rev is None:
1055 assert self._end_rev is None
1054 assert self._end_rev is None
1056 self._end_rev = end_rev
1055 self._end_rev = end_rev
1057 self._start_rev = start_rev
1056 self._start_rev = start_rev
1058 else:
1057 else:
1059 self._start_rev = min(self._start_rev, start_rev)
1058 self._start_rev = min(self._start_rev, start_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1059 self._end_rev = max(self._end_rev, end_rev)
1061 assert self._start_rev <= self._end_rev, (
1060 assert self._start_rev <= self._end_rev, (
1062 self._start_rev,
1061 self._start_rev,
1063 self._end_rev,
1062 self._end_rev,
1064 )
1063 )
1065
1064
1066 def _update(self, revlog, start_rev, end_rev):
1065 def _update(self, revlog, start_rev, end_rev):
1067 """internal method that actually do update content"""
1066 """internal method that actually do update content"""
1068 assert self._start_rev is None or (
1067 assert self._start_rev is None or (
1069 start_rev < self._start_rev or start_rev > self._end_rev
1068 start_rev < self._start_rev or start_rev > self._end_rev
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1069 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1071 assert self._start_rev is None or (
1070 assert self._start_rev is None or (
1072 end_rev < self._start_rev or end_rev > self._end_rev
1071 end_rev < self._start_rev or end_rev > self._end_rev
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1072 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1074 cache = self.snapshots
1073 cache = self.snapshots
1075 if hasattr(revlog.index, 'findsnapshots'):
1074 if hasattr(revlog.index, 'findsnapshots'):
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1075 revlog.index.findsnapshots(cache, start_rev, end_rev)
1077 else:
1076 else:
1078 deltaparent = revlog.deltaparent
1077 deltaparent = revlog.deltaparent
1079 issnapshot = revlog.issnapshot
1078 issnapshot = revlog.issnapshot
1080 for rev in revlog.revs(start_rev, end_rev):
1079 for rev in revlog.revs(start_rev, end_rev):
1081 if issnapshot(rev):
1080 if issnapshot(rev):
1082 cache[deltaparent(rev)].add(rev)
1081 cache[deltaparent(rev)].add(rev)
1083
1082
1084
1083
1085 class deltacomputer:
1084 class deltacomputer:
1086 def __init__(
1085 def __init__(
1087 self,
1086 self,
1088 revlog,
1087 revlog,
1089 write_debug=None,
1088 write_debug=None,
1090 debug_search=False,
1089 debug_search=False,
1091 debug_info=None,
1090 debug_info=None,
1092 ):
1091 ):
1093 self.revlog = revlog
1092 self.revlog = revlog
1094 self._write_debug = write_debug
1093 self._write_debug = write_debug
1095 if write_debug is None:
1094 if write_debug is None:
1096 self._debug_search = False
1095 self._debug_search = False
1097 else:
1096 else:
1098 self._debug_search = debug_search
1097 self._debug_search = debug_search
1099 self._debug_info = debug_info
1098 self._debug_info = debug_info
1100 self._snapshot_cache = SnapshotCache()
1099 self._snapshot_cache = SnapshotCache()
1101
1100
1102 @property
1101 @property
1103 def _gather_debug(self):
1102 def _gather_debug(self):
1104 return self._write_debug is not None or self._debug_info is not None
1103 return self._write_debug is not None or self._debug_info is not None
1105
1104
1106 def buildtext(self, revinfo):
1105 def buildtext(self, revinfo):
1107 """Builds a fulltext version of a revision
1106 """Builds a fulltext version of a revision
1108
1107
1109 revinfo: revisioninfo instance that contains all needed info
1108 revinfo: revisioninfo instance that contains all needed info
1110 """
1109 """
1111 btext = revinfo.btext
1110 btext = revinfo.btext
1112 if btext[0] is not None:
1111 if btext[0] is not None:
1113 return btext[0]
1112 return btext[0]
1114
1113
1115 revlog = self.revlog
1114 revlog = self.revlog
1116 cachedelta = revinfo.cachedelta
1115 cachedelta = revinfo.cachedelta
1117 baserev = cachedelta[0]
1116 baserev = cachedelta[0]
1118 delta = cachedelta[1]
1117 delta = cachedelta[1]
1119
1118
1120 fulltext = btext[0] = _textfromdelta(
1119 fulltext = btext[0] = _textfromdelta(
1121 revlog,
1120 revlog,
1122 baserev,
1121 baserev,
1123 delta,
1122 delta,
1124 revinfo.p1,
1123 revinfo.p1,
1125 revinfo.p2,
1124 revinfo.p2,
1126 revinfo.flags,
1125 revinfo.flags,
1127 revinfo.node,
1126 revinfo.node,
1128 )
1127 )
1129 return fulltext
1128 return fulltext
1130
1129
1131 def _builddeltadiff(self, base, revinfo):
1130 def _builddeltadiff(self, base, revinfo):
1132 revlog = self.revlog
1131 revlog = self.revlog
1133 t = self.buildtext(revinfo)
1132 t = self.buildtext(revinfo)
1134 if revlog.iscensored(base):
1133 if revlog.iscensored(base):
1135 # deltas based on a censored revision must replace the
1134 # deltas based on a censored revision must replace the
1136 # full content in one patch, so delta works everywhere
1135 # full content in one patch, so delta works everywhere
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1136 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1138 delta = header + t
1137 delta = header + t
1139 else:
1138 else:
1140 ptext = revlog.rawdata(base)
1139 ptext = revlog.rawdata(base)
1141 delta = mdiff.textdiff(ptext, t)
1140 delta = mdiff.textdiff(ptext, t)
1142
1141
1143 return delta
1142 return delta
1144
1143
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1144 def _builddeltainfo(self, revinfo, base, target_rev=None):
1146 # can we use the cached delta?
1145 # can we use the cached delta?
1147 revlog = self.revlog
1146 revlog = self.revlog
1148 chainbase = revlog.chainbase(base)
1147 chainbase = revlog.chainbase(base)
1149 if revlog.delta_config.general_delta:
1148 if revlog.delta_config.general_delta:
1150 deltabase = base
1149 deltabase = base
1151 else:
1150 else:
1152 if target_rev is not None and base != target_rev - 1:
1151 if target_rev is not None and base != target_rev - 1:
1153 msg = (
1152 msg = (
1154 b'general delta cannot use delta for something else '
1153 b'general delta cannot use delta for something else '
1155 b'than `prev`: %d<-%d'
1154 b'than `prev`: %d<-%d'
1156 )
1155 )
1157 msg %= (base, target_rev)
1156 msg %= (base, target_rev)
1158 raise error.ProgrammingError(msg)
1157 raise error.ProgrammingError(msg)
1159 deltabase = chainbase
1158 deltabase = chainbase
1160 snapshotdepth = None
1159 snapshotdepth = None
1161 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1160 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1162 snapshotdepth = 0
1161 snapshotdepth = 0
1163 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1162 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1164 # A delta chain should always be one full snapshot,
1163 # A delta chain should always be one full snapshot,
1165 # zero or more semi-snapshots, and zero or more deltas
1164 # zero or more semi-snapshots, and zero or more deltas
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1165 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1166 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1167 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1169 delta = None
1168 delta = None
1170 if revinfo.cachedelta:
1169 if revinfo.cachedelta:
1171 cachebase = revinfo.cachedelta[0]
1170 cachebase = revinfo.cachedelta[0]
1172 # check if the diff still apply
1171 # check if the diff still apply
1173 currentbase = cachebase
1172 currentbase = cachebase
1174 while (
1173 while (
1175 currentbase != nullrev
1174 currentbase != nullrev
1176 and currentbase != base
1175 and currentbase != base
1177 and self.revlog.length(currentbase) == 0
1176 and self.revlog.length(currentbase) == 0
1178 ):
1177 ):
1179 currentbase = self.revlog.deltaparent(currentbase)
1178 currentbase = self.revlog.deltaparent(currentbase)
1180 if self.revlog._lazydelta and currentbase == base:
1179 if self.revlog._lazydelta and currentbase == base:
1181 delta = revinfo.cachedelta[1]
1180 delta = revinfo.cachedelta[1]
1182 if delta is None:
1181 if delta is None:
1183 delta = self._builddeltadiff(base, revinfo)
1182 delta = self._builddeltadiff(base, revinfo)
1184 if self._debug_search:
1183 if self._debug_search:
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1184 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1186 msg %= len(delta)
1185 msg %= len(delta)
1187 self._write_debug(msg)
1186 self._write_debug(msg)
1188 # snapshotdept need to be neither None nor 0 level snapshot
1187 # snapshotdept need to be neither None nor 0 level snapshot
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1188 if revlog.upperboundcomp is not None and snapshotdepth:
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1189 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1190 snapshotlimit = revinfo.textlen >> snapshotdepth
1192 if self._debug_search:
1191 if self._debug_search:
1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1192 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1194 msg %= lowestrealisticdeltalen
1193 msg %= lowestrealisticdeltalen
1195 self._write_debug(msg)
1194 self._write_debug(msg)
1196 if snapshotlimit < lowestrealisticdeltalen:
1195 if snapshotlimit < lowestrealisticdeltalen:
1197 if self._debug_search:
1196 if self._debug_search:
1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1197 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1199 self._write_debug(msg)
1198 self._write_debug(msg)
1200 return None
1199 return None
1201 if revlog.length(base) < lowestrealisticdeltalen:
1200 if revlog.length(base) < lowestrealisticdeltalen:
1202 if self._debug_search:
1201 if self._debug_search:
1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1202 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1204 self._write_debug(msg)
1203 self._write_debug(msg)
1205 return None
1204 return None
1206 header, data = revlog.compress(delta)
1205 header, data = revlog.compress(delta)
1207 deltalen = len(header) + len(data)
1206 deltalen = len(header) + len(data)
1208 offset = revlog.end(len(revlog) - 1)
1207 offset = revlog.end(len(revlog) - 1)
1209 dist = deltalen + offset - revlog.start(chainbase)
1208 dist = deltalen + offset - revlog.start(chainbase)
1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1209 chainlen, compresseddeltalen = revlog._chaininfo(base)
1211 chainlen += 1
1210 chainlen += 1
1212 compresseddeltalen += deltalen
1211 compresseddeltalen += deltalen
1213
1212
1214 return _deltainfo(
1213 return _deltainfo(
1215 dist,
1214 dist,
1216 deltalen,
1215 deltalen,
1217 (header, data),
1216 (header, data),
1218 deltabase,
1217 deltabase,
1219 chainbase,
1218 chainbase,
1220 chainlen,
1219 chainlen,
1221 compresseddeltalen,
1220 compresseddeltalen,
1222 snapshotdepth,
1221 snapshotdepth,
1223 )
1222 )
1224
1223
1225 def _fullsnapshotinfo(self, revinfo, curr):
1224 def _fullsnapshotinfo(self, revinfo, curr):
1226 rawtext = self.buildtext(revinfo)
1225 rawtext = self.buildtext(revinfo)
1227 data = self.revlog.compress(rawtext)
1226 data = self.revlog.compress(rawtext)
1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1227 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1229 deltabase = chainbase = curr
1228 deltabase = chainbase = curr
1230 snapshotdepth = 0
1229 snapshotdepth = 0
1231 chainlen = 1
1230 chainlen = 1
1232
1231
1233 return _deltainfo(
1232 return _deltainfo(
1234 dist,
1233 dist,
1235 deltalen,
1234 deltalen,
1236 data,
1235 data,
1237 deltabase,
1236 deltabase,
1238 chainbase,
1237 chainbase,
1239 chainlen,
1238 chainlen,
1240 compresseddeltalen,
1239 compresseddeltalen,
1241 snapshotdepth,
1240 snapshotdepth,
1242 )
1241 )
1243
1242
1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1243 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1245 """Find an acceptable delta against a candidate revision
1244 """Find an acceptable delta against a candidate revision
1246
1245
1247 revinfo: information about the revision (instance of _revisioninfo)
1246 revinfo: information about the revision (instance of _revisioninfo)
1248
1247
1249 Returns the first acceptable candidate revision, as ordered by
1248 Returns the first acceptable candidate revision, as ordered by
1250 _candidategroups
1249 _candidategroups
1251
1250
1252 If no suitable deltabase is found, we return delta info for a full
1251 If no suitable deltabase is found, we return delta info for a full
1253 snapshot.
1252 snapshot.
1254
1253
1255 `excluded_bases` is an optional set of revision that cannot be used as
1254 `excluded_bases` is an optional set of revision that cannot be used as
1256 a delta base. Use this to recompute delta suitable in censor or strip
1255 a delta base. Use this to recompute delta suitable in censor or strip
1257 context.
1256 context.
1258 """
1257 """
1259 if target_rev is None:
1258 if target_rev is None:
1260 target_rev = len(self.revlog)
1259 target_rev = len(self.revlog)
1261
1260
1262 gather_debug = self._gather_debug
1261 gather_debug = self._gather_debug
1263 cachedelta = revinfo.cachedelta
1262 cachedelta = revinfo.cachedelta
1264 revlog = self.revlog
1263 revlog = self.revlog
1265 p1r = p2r = None
1264 p1r = p2r = None
1266
1265
1267 if excluded_bases is None:
1266 if excluded_bases is None:
1268 excluded_bases = set()
1267 excluded_bases = set()
1269
1268
1270 if gather_debug:
1269 if gather_debug:
1271 start = util.timer()
1270 start = util.timer()
1272 dbg = self._one_dbg_data()
1271 dbg = self._one_dbg_data()
1273 dbg['revision'] = target_rev
1272 dbg['revision'] = target_rev
1274 target_revlog = b"UNKNOWN"
1273 target_revlog = b"UNKNOWN"
1275 target_type = self.revlog.target[0]
1274 target_type = self.revlog.target[0]
1276 target_key = self.revlog.target[1]
1275 target_key = self.revlog.target[1]
1277 if target_type == KIND_CHANGELOG:
1276 if target_type == KIND_CHANGELOG:
1278 target_revlog = b'CHANGELOG:'
1277 target_revlog = b'CHANGELOG:'
1279 elif target_type == KIND_MANIFESTLOG:
1278 elif target_type == KIND_MANIFESTLOG:
1280 target_revlog = b'MANIFESTLOG:'
1279 target_revlog = b'MANIFESTLOG:'
1281 if target_key:
1280 if target_key:
1282 target_revlog += b'%s:' % target_key
1281 target_revlog += b'%s:' % target_key
1283 elif target_type == KIND_FILELOG:
1282 elif target_type == KIND_FILELOG:
1284 target_revlog = b'FILELOG:'
1283 target_revlog = b'FILELOG:'
1285 if target_key:
1284 if target_key:
1286 target_revlog += b'%s:' % target_key
1285 target_revlog += b'%s:' % target_key
1287 dbg['target-revlog'] = target_revlog
1286 dbg['target-revlog'] = target_revlog
1288 p1r = revlog.rev(revinfo.p1)
1287 p1r = revlog.rev(revinfo.p1)
1289 p2r = revlog.rev(revinfo.p2)
1288 p2r = revlog.rev(revinfo.p2)
1290 if p1r != nullrev:
1289 if p1r != nullrev:
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1290 p1_chain_len = revlog._chaininfo(p1r)[0]
1292 else:
1291 else:
1293 p1_chain_len = -1
1292 p1_chain_len = -1
1294 if p2r != nullrev:
1293 if p2r != nullrev:
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1294 p2_chain_len = revlog._chaininfo(p2r)[0]
1296 else:
1295 else:
1297 p2_chain_len = -1
1296 p2_chain_len = -1
1298 dbg['p1-chain-len'] = p1_chain_len
1297 dbg['p1-chain-len'] = p1_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1298 dbg['p2-chain-len'] = p2_chain_len
1300
1299
1301 # 1) if the revision is empty, no amount of delta can beat it
1300 # 1) if the revision is empty, no amount of delta can beat it
1302 #
1301 #
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1302 # 2) no delta for flag processor revision (see "candelta" for why)
1304 # not calling candelta since only one revision needs test, also to
1303 # not calling candelta since only one revision needs test, also to
1305 # avoid overhead fetching flags again.
1304 # avoid overhead fetching flags again.
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1305 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1306 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1308 if gather_debug:
1307 if gather_debug:
1309 end = util.timer()
1308 end = util.timer()
1310 dbg['duration'] = end - start
1309 dbg['duration'] = end - start
1311 dbg[
1310 dbg[
1312 'delta-base'
1311 'delta-base'
1313 ] = deltainfo.base # pytype: disable=attribute-error
1312 ] = deltainfo.base # pytype: disable=attribute-error
1314 dbg['search_round_count'] = 0
1313 dbg['search_round_count'] = 0
1315 dbg['using-cached-base'] = False
1314 dbg['using-cached-base'] = False
1316 dbg['delta_try_count'] = 0
1315 dbg['delta_try_count'] = 0
1317 dbg['type'] = b"full"
1316 dbg['type'] = b"full"
1318 dbg['snapshot-depth'] = 0
1317 dbg['snapshot-depth'] = 0
1319 self._dbg_process_data(dbg)
1318 self._dbg_process_data(dbg)
1320 return deltainfo
1319 return deltainfo
1321
1320
1322 deltainfo = None
1321 deltainfo = None
1323
1322
1324 # If this source delta are to be forcibly reuse, let us comply early.
1323 # If this source delta are to be forcibly reuse, let us comply early.
1325 if (
1324 if (
1326 revlog.delta_config.general_delta
1325 revlog.delta_config.general_delta
1327 and revinfo.cachedelta is not None
1326 and revinfo.cachedelta is not None
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1327 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 ):
1328 ):
1330 base = revinfo.cachedelta[0]
1329 base = revinfo.cachedelta[0]
1331 if base == nullrev:
1330 if base == nullrev:
1332 dbg_type = b"full"
1331 dbg_type = b"full"
1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1332 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1334 if gather_debug:
1333 if gather_debug:
1335 snapshotdepth = 0
1334 snapshotdepth = 0
1336 elif base not in excluded_bases:
1335 elif base not in excluded_bases:
1337 delta = revinfo.cachedelta[1]
1336 delta = revinfo.cachedelta[1]
1338 header, data = revlog.compress(delta)
1337 header, data = revlog.compress(delta)
1339 deltalen = len(header) + len(data)
1338 deltalen = len(header) + len(data)
1340 if gather_debug:
1339 if gather_debug:
1341 offset = revlog.end(len(revlog) - 1)
1340 offset = revlog.end(len(revlog) - 1)
1342 chainbase = revlog.chainbase(base)
1341 chainbase = revlog.chainbase(base)
1343 distance = deltalen + offset - revlog.start(chainbase)
1342 distance = deltalen + offset - revlog.start(chainbase)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1343 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 chainlen += 1
1344 chainlen += 1
1346 compresseddeltalen += deltalen
1345 compresseddeltalen += deltalen
1347 if base == p1r or base == p2r:
1346 if base == p1r or base == p2r:
1348 dbg_type = b"delta"
1347 dbg_type = b"delta"
1349 snapshotdepth = None
1348 snapshotdepth = None
1350 elif not revlog.issnapshot(base):
1349 elif not revlog.issnapshot(base):
1351 snapshotdepth = None
1350 snapshotdepth = None
1352 else:
1351 else:
1353 dbg_type = b"snapshot"
1352 dbg_type = b"snapshot"
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1353 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 else:
1354 else:
1356 distance = None
1355 distance = None
1357 chainbase = None
1356 chainbase = None
1358 chainlen = None
1357 chainlen = None
1359 compresseddeltalen = None
1358 compresseddeltalen = None
1360 snapshotdepth = None
1359 snapshotdepth = None
1361 deltainfo = _deltainfo(
1360 deltainfo = _deltainfo(
1362 distance=distance,
1361 distance=distance,
1363 deltalen=deltalen,
1362 deltalen=deltalen,
1364 data=(header, data),
1363 data=(header, data),
1365 base=base,
1364 base=base,
1366 chainbase=chainbase,
1365 chainbase=chainbase,
1367 chainlen=chainlen,
1366 chainlen=chainlen,
1368 compresseddeltalen=compresseddeltalen,
1367 compresseddeltalen=compresseddeltalen,
1369 snapshotdepth=snapshotdepth,
1368 snapshotdepth=snapshotdepth,
1370 )
1369 )
1371
1370
1372 if deltainfo is not None:
1371 if deltainfo is not None:
1373 if gather_debug:
1372 if gather_debug:
1374 end = util.timer()
1373 end = util.timer()
1375 dbg['duration'] = end - start
1374 dbg['duration'] = end - start
1376 dbg[
1375 dbg[
1377 'delta-base'
1376 'delta-base'
1378 ] = deltainfo.base # pytype: disable=attribute-error
1377 ] = deltainfo.base # pytype: disable=attribute-error
1379 dbg['search_round_count'] = 0
1378 dbg['search_round_count'] = 0
1380 dbg['using-cached-base'] = True
1379 dbg['using-cached-base'] = True
1381 dbg['delta_try_count'] = 0
1380 dbg['delta_try_count'] = 0
1382 dbg['type'] = b"full"
1381 dbg['type'] = b"full"
1383 if snapshotdepth is None:
1382 if snapshotdepth is None:
1384 dbg['snapshot-depth'] = 0
1383 dbg['snapshot-depth'] = 0
1385 else:
1384 else:
1386 dbg['snapshot-depth'] = snapshotdepth
1385 dbg['snapshot-depth'] = snapshotdepth
1387 self._dbg_process_data(dbg)
1386 self._dbg_process_data(dbg)
1388 return deltainfo
1387 return deltainfo
1389
1388
1390 # count the number of different delta we tried (for debug purpose)
1389 # count the number of different delta we tried (for debug purpose)
1391 dbg_try_count = 0
1390 dbg_try_count = 0
1392 # count the number of "search round" we did. (for debug purpose)
1391 # count the number of "search round" we did. (for debug purpose)
1393 dbg_try_rounds = 0
1392 dbg_try_rounds = 0
1394 dbg_type = b'unknown'
1393 dbg_type = b'unknown'
1395
1394
1396 if p1r is None:
1395 if p1r is None:
1397 p1r = revlog.rev(revinfo.p1)
1396 p1r = revlog.rev(revinfo.p1)
1398 p2r = revlog.rev(revinfo.p2)
1397 p2r = revlog.rev(revinfo.p2)
1399
1398
1400 if self._debug_search:
1399 if self._debug_search:
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1400 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1402 msg %= target_rev
1401 msg %= target_rev
1403 self._write_debug(msg)
1402 self._write_debug(msg)
1404
1403
1405 groups = _candidategroups(
1404 groups = _candidategroups(
1406 self.revlog,
1405 self.revlog,
1407 revinfo.textlen,
1406 revinfo.textlen,
1408 p1r,
1407 p1r,
1409 p2r,
1408 p2r,
1410 cachedelta,
1409 cachedelta,
1411 excluded_bases,
1410 excluded_bases,
1412 target_rev,
1411 target_rev,
1413 snapshot_cache=self._snapshot_cache,
1412 snapshot_cache=self._snapshot_cache,
1414 )
1413 )
1415 candidaterevs = next(groups)
1414 candidaterevs = next(groups)
1416 while candidaterevs is not None:
1415 while candidaterevs is not None:
1417 dbg_try_rounds += 1
1416 dbg_try_rounds += 1
1418 if self._debug_search:
1417 if self._debug_search:
1419 prev = None
1418 prev = None
1420 if deltainfo is not None:
1419 if deltainfo is not None:
1421 prev = deltainfo.base
1420 prev = deltainfo.base
1422
1421
1423 if (
1422 if (
1424 cachedelta is not None
1423 cachedelta is not None
1425 and len(candidaterevs) == 1
1424 and len(candidaterevs) == 1
1426 and cachedelta[0] in candidaterevs
1425 and cachedelta[0] in candidaterevs
1427 ):
1426 ):
1428 round_type = b"cached-delta"
1427 round_type = b"cached-delta"
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1428 elif p1r in candidaterevs or p2r in candidaterevs:
1430 round_type = b"parents"
1429 round_type = b"parents"
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1430 elif prev is not None and all(c < prev for c in candidaterevs):
1432 round_type = b"refine-down"
1431 round_type = b"refine-down"
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1432 elif prev is not None and all(c > prev for c in candidaterevs):
1434 round_type = b"refine-up"
1433 round_type = b"refine-up"
1435 else:
1434 else:
1436 round_type = b"search-down"
1435 round_type = b"search-down"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1436 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1437 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1439 self._write_debug(msg)
1438 self._write_debug(msg)
1440 nominateddeltas = []
1439 nominateddeltas = []
1441 if deltainfo is not None:
1440 if deltainfo is not None:
1442 if self._debug_search:
1441 if self._debug_search:
1443 msg = (
1442 msg = (
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1443 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1445 )
1444 )
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1445 msg %= (deltainfo.base, deltainfo.deltalen)
1447 self._write_debug(msg)
1446 self._write_debug(msg)
1448 # if we already found a good delta,
1447 # if we already found a good delta,
1449 # challenge it against refined candidates
1448 # challenge it against refined candidates
1450 nominateddeltas.append(deltainfo)
1449 nominateddeltas.append(deltainfo)
1451 for candidaterev in candidaterevs:
1450 for candidaterev in candidaterevs:
1452 if self._debug_search:
1451 if self._debug_search:
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1452 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1454 msg %= candidaterev
1453 msg %= candidaterev
1455 self._write_debug(msg)
1454 self._write_debug(msg)
1456 candidate_type = None
1455 candidate_type = None
1457 if candidaterev == p1r:
1456 if candidaterev == p1r:
1458 candidate_type = b"p1"
1457 candidate_type = b"p1"
1459 elif candidaterev == p2r:
1458 elif candidaterev == p2r:
1460 candidate_type = b"p2"
1459 candidate_type = b"p2"
1461 elif self.revlog.issnapshot(candidaterev):
1460 elif self.revlog.issnapshot(candidaterev):
1462 candidate_type = b"snapshot-%d"
1461 candidate_type = b"snapshot-%d"
1463 candidate_type %= self.revlog.snapshotdepth(
1462 candidate_type %= self.revlog.snapshotdepth(
1464 candidaterev
1463 candidaterev
1465 )
1464 )
1466
1465
1467 if candidate_type is not None:
1466 if candidate_type is not None:
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1467 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1469 msg %= candidate_type
1468 msg %= candidate_type
1470 self._write_debug(msg)
1469 self._write_debug(msg)
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1470 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1472 msg %= self.revlog.length(candidaterev)
1471 msg %= self.revlog.length(candidaterev)
1473 self._write_debug(msg)
1472 self._write_debug(msg)
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1473 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1475 msg %= self.revlog.deltaparent(candidaterev)
1474 msg %= self.revlog.deltaparent(candidaterev)
1476 self._write_debug(msg)
1475 self._write_debug(msg)
1477
1476
1478 dbg_try_count += 1
1477 dbg_try_count += 1
1479
1478
1480 if self._debug_search:
1479 if self._debug_search:
1481 delta_start = util.timer()
1480 delta_start = util.timer()
1482 candidatedelta = self._builddeltainfo(
1481 candidatedelta = self._builddeltainfo(
1483 revinfo,
1482 revinfo,
1484 candidaterev,
1483 candidaterev,
1485 target_rev=target_rev,
1484 target_rev=target_rev,
1486 )
1485 )
1487 if self._debug_search:
1486 if self._debug_search:
1488 delta_end = util.timer()
1487 delta_end = util.timer()
1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1488 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1490 msg %= delta_end - delta_start
1489 msg %= delta_end - delta_start
1491 self._write_debug(msg)
1490 self._write_debug(msg)
1492 if candidatedelta is not None:
1491 if candidatedelta is not None:
1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1492 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1494 if self._debug_search:
1493 if self._debug_search:
1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1494 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1496 msg %= candidatedelta.deltalen
1495 msg %= candidatedelta.deltalen
1497 self._write_debug(msg)
1496 self._write_debug(msg)
1498 nominateddeltas.append(candidatedelta)
1497 nominateddeltas.append(candidatedelta)
1499 elif self._debug_search:
1498 elif self._debug_search:
1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1499 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1501 msg %= candidatedelta.deltalen
1500 msg %= candidatedelta.deltalen
1502 self._write_debug(msg)
1501 self._write_debug(msg)
1503 elif self._debug_search:
1502 elif self._debug_search:
1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1503 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1505 self._write_debug(msg)
1504 self._write_debug(msg)
1506 if nominateddeltas:
1505 if nominateddeltas:
1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1506 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1508 if deltainfo is not None:
1507 if deltainfo is not None:
1509 candidaterevs = groups.send(deltainfo.base)
1508 candidaterevs = groups.send(deltainfo.base)
1510 else:
1509 else:
1511 candidaterevs = next(groups)
1510 candidaterevs = next(groups)
1512
1511
1513 if deltainfo is None:
1512 if deltainfo is None:
1514 dbg_type = b"full"
1513 dbg_type = b"full"
1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1514 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1515 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1517 dbg_type = b"snapshot"
1516 dbg_type = b"snapshot"
1518 else:
1517 else:
1519 dbg_type = b"delta"
1518 dbg_type = b"delta"
1520
1519
1521 if gather_debug:
1520 if gather_debug:
1522 end = util.timer()
1521 end = util.timer()
1523 if dbg_type == b'full':
1522 if dbg_type == b'full':
1524 used_cached = (
1523 used_cached = (
1525 cachedelta is not None
1524 cachedelta is not None
1526 and dbg_try_rounds == 0
1525 and dbg_try_rounds == 0
1527 and dbg_try_count == 0
1526 and dbg_try_count == 0
1528 and cachedelta[0] == nullrev
1527 and cachedelta[0] == nullrev
1529 )
1528 )
1530 else:
1529 else:
1531 used_cached = (
1530 used_cached = (
1532 cachedelta is not None
1531 cachedelta is not None
1533 and dbg_try_rounds == 1
1532 and dbg_try_rounds == 1
1534 and dbg_try_count == 1
1533 and dbg_try_count == 1
1535 and deltainfo.base == cachedelta[0]
1534 and deltainfo.base == cachedelta[0]
1536 )
1535 )
1537 dbg['duration'] = end - start
1536 dbg['duration'] = end - start
1538 dbg[
1537 dbg[
1539 'delta-base'
1538 'delta-base'
1540 ] = deltainfo.base # pytype: disable=attribute-error
1539 ] = deltainfo.base # pytype: disable=attribute-error
1541 dbg['search_round_count'] = dbg_try_rounds
1540 dbg['search_round_count'] = dbg_try_rounds
1542 dbg['using-cached-base'] = used_cached
1541 dbg['using-cached-base'] = used_cached
1543 dbg['delta_try_count'] = dbg_try_count
1542 dbg['delta_try_count'] = dbg_try_count
1544 dbg['type'] = dbg_type
1543 dbg['type'] = dbg_type
1545 if (
1544 if (
1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1545 deltainfo.snapshotdepth # pytype: disable=attribute-error
1547 is not None
1546 is not None
1548 ):
1547 ):
1549 dbg[
1548 dbg[
1550 'snapshot-depth'
1549 'snapshot-depth'
1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1550 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1552 else:
1551 else:
1553 dbg['snapshot-depth'] = 0
1552 dbg['snapshot-depth'] = 0
1554 self._dbg_process_data(dbg)
1553 self._dbg_process_data(dbg)
1555 return deltainfo
1554 return deltainfo
1556
1555
1557 def _one_dbg_data(self):
1556 def _one_dbg_data(self):
1558 return {
1557 return {
1559 'duration': None,
1558 'duration': None,
1560 'revision': None,
1559 'revision': None,
1561 'delta-base': None,
1560 'delta-base': None,
1562 'search_round_count': None,
1561 'search_round_count': None,
1563 'using-cached-base': None,
1562 'using-cached-base': None,
1564 'delta_try_count': None,
1563 'delta_try_count': None,
1565 'type': None,
1564 'type': None,
1566 'p1-chain-len': None,
1565 'p1-chain-len': None,
1567 'p2-chain-len': None,
1566 'p2-chain-len': None,
1568 'snapshot-depth': None,
1567 'snapshot-depth': None,
1569 'target-revlog': None,
1568 'target-revlog': None,
1570 }
1569 }
1571
1570
1572 def _dbg_process_data(self, dbg):
1571 def _dbg_process_data(self, dbg):
1573 if self._debug_info is not None:
1572 if self._debug_info is not None:
1574 self._debug_info.append(dbg)
1573 self._debug_info.append(dbg)
1575
1574
1576 if self._write_debug is not None:
1575 if self._write_debug is not None:
1577 msg = (
1576 msg = (
1578 b"DBG-DELTAS:"
1577 b"DBG-DELTAS:"
1579 b" %-12s"
1578 b" %-12s"
1580 b" rev=%d:"
1579 b" rev=%d:"
1581 b" delta-base=%d"
1580 b" delta-base=%d"
1582 b" is-cached=%d"
1581 b" is-cached=%d"
1583 b" - search-rounds=%d"
1582 b" - search-rounds=%d"
1584 b" try-count=%d"
1583 b" try-count=%d"
1585 b" - delta-type=%-6s"
1584 b" - delta-type=%-6s"
1586 b" snap-depth=%d"
1585 b" snap-depth=%d"
1587 b" - p1-chain-length=%d"
1586 b" - p1-chain-length=%d"
1588 b" p2-chain-length=%d"
1587 b" p2-chain-length=%d"
1589 b" - duration=%f"
1588 b" - duration=%f"
1590 b"\n"
1589 b"\n"
1591 )
1590 )
1592 msg %= (
1591 msg %= (
1593 dbg["target-revlog"],
1592 dbg["target-revlog"],
1594 dbg["revision"],
1593 dbg["revision"],
1595 dbg["delta-base"],
1594 dbg["delta-base"],
1596 dbg["using-cached-base"],
1595 dbg["using-cached-base"],
1597 dbg["search_round_count"],
1596 dbg["search_round_count"],
1598 dbg["delta_try_count"],
1597 dbg["delta_try_count"],
1599 dbg["type"],
1598 dbg["type"],
1600 dbg["snapshot-depth"],
1599 dbg["snapshot-depth"],
1601 dbg["p1-chain-len"],
1600 dbg["p1-chain-len"],
1602 dbg["p2-chain-len"],
1601 dbg["p2-chain-len"],
1603 dbg["duration"],
1602 dbg["duration"],
1604 )
1603 )
1605 self._write_debug(msg)
1604 self._write_debug(msg)
1606
1605
1607
1606
1608 def delta_compression(default_compression_header, deltainfo):
1607 def delta_compression(default_compression_header, deltainfo):
1609 """return (COMPRESSION_MODE, deltainfo)
1608 """return (COMPRESSION_MODE, deltainfo)
1610
1609
1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1610 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1612 compression.
1611 compression.
1613 """
1612 """
1614 h, d = deltainfo.data
1613 h, d = deltainfo.data
1615 compression_mode = COMP_MODE_INLINE
1614 compression_mode = COMP_MODE_INLINE
1616 if not h and not d:
1615 if not h and not d:
1617 # not data to store at all... declare them uncompressed
1616 # not data to store at all... declare them uncompressed
1618 compression_mode = COMP_MODE_PLAIN
1617 compression_mode = COMP_MODE_PLAIN
1619 elif not h:
1618 elif not h:
1620 t = d[0:1]
1619 t = d[0:1]
1621 if t == b'\0':
1620 if t == b'\0':
1622 compression_mode = COMP_MODE_PLAIN
1621 compression_mode = COMP_MODE_PLAIN
1623 elif t == default_compression_header:
1622 elif t == default_compression_header:
1624 compression_mode = COMP_MODE_DEFAULT
1623 compression_mode = COMP_MODE_DEFAULT
1625 elif h == b'u':
1624 elif h == b'u':
1626 # we have a more efficient way to declare uncompressed
1625 # we have a more efficient way to declare uncompressed
1627 h = b''
1626 h = b''
1628 compression_mode = COMP_MODE_PLAIN
1627 compression_mode = COMP_MODE_PLAIN
1629 deltainfo = drop_u_compression(deltainfo)
1628 deltainfo = drop_u_compression(deltainfo)
1630 return compression_mode, deltainfo
1629 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now