##// END OF EJS Templates
revlog: remove legacy usage of `_candidate_group_chunk_size`...
marmoute -
r51948:fa7d307e default
parent child Browse files
Show More
@@ -1,1630 +1,1630 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self._srdensitythreshold = density
53 self._srdensitythreshold = density
54 self._srmingapsize = mingap
54 self._srmingapsize = mingap
55 self.data_config = revlog.DataConfig()
55 self.data_config = revlog.DataConfig()
56 self.data_config.sr_density_threshold = density
56 self.data_config.sr_density_threshold = density
57 self.data_config.sr_min_gap_size = mingap
57 self.data_config.sr_min_gap_size = mingap
58 self.delta_config = revlog.DeltaConfig()
58 self.delta_config = revlog.DeltaConfig()
59 self.feature_config = revlog.FeatureConfig()
59 self.feature_config = revlog.FeatureConfig()
60 self._snapshot = set(snapshot)
60 self._snapshot = set(snapshot)
61 self.index = None
61 self.index = None
62
62
63 def start(self, rev):
63 def start(self, rev):
64 if rev == nullrev:
64 if rev == nullrev:
65 return 0
65 return 0
66 if rev == 0:
66 if rev == 0:
67 return 0
67 return 0
68 return self._data[rev - 1]
68 return self._data[rev - 1]
69
69
70 def end(self, rev):
70 def end(self, rev):
71 if rev == nullrev:
71 if rev == nullrev:
72 return 0
72 return 0
73 return self._data[rev]
73 return self._data[rev]
74
74
75 def length(self, rev):
75 def length(self, rev):
76 return self.end(rev) - self.start(rev)
76 return self.end(rev) - self.start(rev)
77
77
78 def __len__(self):
78 def __len__(self):
79 return len(self._data)
79 return len(self._data)
80
80
81 def issnapshot(self, rev):
81 def issnapshot(self, rev):
82 if rev == nullrev:
82 if rev == nullrev:
83 return True
83 return True
84 return rev in self._snapshot
84 return rev in self._snapshot
85
85
86
86
87 def slicechunk(revlog, revs, targetsize=None):
87 def slicechunk(revlog, revs, targetsize=None):
88 """slice revs to reduce the amount of unrelated data to be read from disk.
88 """slice revs to reduce the amount of unrelated data to be read from disk.
89
89
90 ``revs`` is sliced into groups that should be read in one time.
90 ``revs`` is sliced into groups that should be read in one time.
91 Assume that revs are sorted.
91 Assume that revs are sorted.
92
92
93 The initial chunk is sliced until the overall density (payload/chunks-span
93 The initial chunk is sliced until the overall density (payload/chunks-span
94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
95 `revlog._srmingapsize` is skipped.
95 `revlog._srmingapsize` is skipped.
96
96
97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
98 For consistency with other slicing choice, this limit won't go lower than
98 For consistency with other slicing choice, this limit won't go lower than
99 `revlog._srmingapsize`.
99 `revlog._srmingapsize`.
100
100
101 If individual revisions chunk are larger than this limit, they will still
101 If individual revisions chunk are larger than this limit, they will still
102 be raised individually.
102 be raised individually.
103
103
104 >>> data = [
104 >>> data = [
105 ... 5, #00 (5)
105 ... 5, #00 (5)
106 ... 10, #01 (5)
106 ... 10, #01 (5)
107 ... 12, #02 (2)
107 ... 12, #02 (2)
108 ... 12, #03 (empty)
108 ... 12, #03 (empty)
109 ... 27, #04 (15)
109 ... 27, #04 (15)
110 ... 31, #05 (4)
110 ... 31, #05 (4)
111 ... 31, #06 (empty)
111 ... 31, #06 (empty)
112 ... 42, #07 (11)
112 ... 42, #07 (11)
113 ... 47, #08 (5)
113 ... 47, #08 (5)
114 ... 47, #09 (empty)
114 ... 47, #09 (empty)
115 ... 48, #10 (1)
115 ... 48, #10 (1)
116 ... 51, #11 (3)
116 ... 51, #11 (3)
117 ... 74, #12 (23)
117 ... 74, #12 (23)
118 ... 85, #13 (11)
118 ... 85, #13 (11)
119 ... 86, #14 (1)
119 ... 86, #14 (1)
120 ... 91, #15 (5)
120 ... 91, #15 (5)
121 ... ]
121 ... ]
122 >>> revlog = _testrevlog(data, snapshot=range(16))
122 >>> revlog = _testrevlog(data, snapshot=range(16))
123
123
124 >>> list(slicechunk(revlog, list(range(16))))
124 >>> list(slicechunk(revlog, list(range(16))))
125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
126 >>> list(slicechunk(revlog, [0, 15]))
126 >>> list(slicechunk(revlog, [0, 15]))
127 [[0], [15]]
127 [[0], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 15]))
129 [[0], [11], [15]]
129 [[0], [11], [15]]
130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
131 [[0], [11, 13, 15]]
131 [[0], [11, 13, 15]]
132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
133 [[1, 2], [5, 8, 10, 11], [14]]
133 [[1, 2], [5, 8, 10, 11], [14]]
134
134
135 Slicing with a maximum chunk size
135 Slicing with a maximum chunk size
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
137 [[0], [11], [13], [15]]
137 [[0], [11], [13], [15]]
138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
139 [[0], [11], [13, 15]]
139 [[0], [11], [13, 15]]
140
140
141 Slicing involving nullrev
141 Slicing involving nullrev
142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
143 [[-1, 0], [11], [13, 15]]
143 [[-1, 0], [11], [13, 15]]
144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
145 [[-1], [13], [15]]
145 [[-1], [13], [15]]
146 """
146 """
147 if targetsize is not None:
147 if targetsize is not None:
148 targetsize = max(targetsize, revlog._srmingapsize)
148 targetsize = max(targetsize, revlog._srmingapsize)
149 # targetsize should not be specified when evaluating delta candidates:
149 # targetsize should not be specified when evaluating delta candidates:
150 # * targetsize is used to ensure we stay within specification when reading,
150 # * targetsize is used to ensure we stay within specification when reading,
151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
152 if densityslicing is None:
152 if densityslicing is None:
153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
154 for chunk in densityslicing(
154 for chunk in densityslicing(
155 revs, revlog._srdensitythreshold, revlog._srmingapsize
155 revs, revlog._srdensitythreshold, revlog._srmingapsize
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
590 performant."""
591 if deltainfo is None:
591 if deltainfo is None:
592 return False
592 return False
593
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
597 assert (
598 revinfo.cachedelta is None
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
601 )
601 )
602
602
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
607 # we consume.
608
608
609 textlen = revinfo.textlen
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
610 defaultmax = textlen * 4
611 maxdist = revlog._maxdeltachainspan
611 maxdist = revlog._maxdeltachainspan
612 if not maxdist:
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
615
615
616 # Bad delta from read span:
616 # Bad delta from read span:
617 #
617 #
618 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
619 #
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
624 # we skip the span check entirely.
625 if not revlog._sparserevlog and maxdist < deltainfo.distance:
625 if not revlog._sparserevlog and maxdist < deltainfo.distance:
626 return False
626 return False
627
627
628 # Bad delta from new delta size:
628 # Bad delta from new delta size:
629 #
629 #
630 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
633 return False
633 return False
634
634
635 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
636 #
636 #
637 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
639 return False
640
640
641 # Bad delta from chain length:
641 # Bad delta from chain length:
642 #
642 #
643 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
644 if (
644 if (
645 revlog.delta_config.max_chain_len
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
651 #
651 #
652 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
653 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
654 # created.
654 # created.
655 if (
655 if (
656 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
658 ):
658 ):
659 return False
659 return False
660
660
661 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
662 # snapshot
662 # snapshot
663 if (
663 if (
664 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
666 ):
666 ):
667 return False
667 return False
668
668
669 return True
669 return True
670
670
671
671
672 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
674 # consider these candidates.
674 # consider these candidates.
675 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
676
676
677
677
678 def _candidategroups(
678 def _candidategroups(
679 revlog,
679 revlog,
680 textlen,
680 textlen,
681 p1,
681 p1,
682 p2,
682 p2,
683 cachedelta,
683 cachedelta,
684 excluded_bases=None,
684 excluded_bases=None,
685 target_rev=None,
685 target_rev=None,
686 snapshot_cache=None,
686 snapshot_cache=None,
687 ):
687 ):
688 """Provides group of revision to be tested as delta base
688 """Provides group of revision to be tested as delta base
689
689
690 This top level function focus on emitting groups with unique and worthwhile
690 This top level function focus on emitting groups with unique and worthwhile
691 content. See _raw_candidate_groups for details about the group order.
691 content. See _raw_candidate_groups for details about the group order.
692 """
692 """
693 # should we try to build a delta?
693 # should we try to build a delta?
694 if not (len(revlog) and revlog._storedeltachains):
694 if not (len(revlog) and revlog._storedeltachains):
695 yield None
695 yield None
696 return
696 return
697
697
698 if target_rev is None:
698 if target_rev is None:
699 target_rev = len(revlog)
699 target_rev = len(revlog)
700
700
701 if not revlog.delta_config.general_delta:
701 if not revlog.delta_config.general_delta:
702 # before general delta, there is only one possible delta base
702 # before general delta, there is only one possible delta base
703 yield (target_rev - 1,)
703 yield (target_rev - 1,)
704 yield None
704 yield None
705 return
705 return
706
706
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
708 # we should never end up asking such question. Adding the assert as a
708 # we should never end up asking such question. Adding the assert as a
709 # safe-guard to detect anything that would be fishy in this regard.
709 # safe-guard to detect anything that would be fishy in this regard.
710 assert (
710 assert (
711 cachedelta is None
711 cachedelta is None
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
713 or not revlog.delta_config.general_delta
713 or not revlog.delta_config.general_delta
714 )
714 )
715
715
716 deltalength = revlog.length
716 deltalength = revlog.length
717 deltaparent = revlog.deltaparent
717 deltaparent = revlog.deltaparent
718 sparse = revlog._sparserevlog
718 sparse = revlog._sparserevlog
719 good = None
719 good = None
720
720
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
722 group_chunk_size = revlog._candidate_group_chunk_size
722 group_chunk_size = revlog.delta_config.candidate_group_chunk_size
723
723
724 tested = {nullrev}
724 tested = {nullrev}
725 candidates = _refinedgroups(
725 candidates = _refinedgroups(
726 revlog,
726 revlog,
727 p1,
727 p1,
728 p2,
728 p2,
729 cachedelta,
729 cachedelta,
730 snapshot_cache=snapshot_cache,
730 snapshot_cache=snapshot_cache,
731 )
731 )
732 while True:
732 while True:
733 temptative = candidates.send(good)
733 temptative = candidates.send(good)
734 if temptative is None:
734 if temptative is None:
735 break
735 break
736 group = []
736 group = []
737 for rev in temptative:
737 for rev in temptative:
738 # skip over empty delta (no need to include them in a chain)
738 # skip over empty delta (no need to include them in a chain)
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
740 tested.add(rev)
740 tested.add(rev)
741 rev = deltaparent(rev)
741 rev = deltaparent(rev)
742 # no need to try a delta against nullrev, this will be done as a
742 # no need to try a delta against nullrev, this will be done as a
743 # last resort.
743 # last resort.
744 if rev == nullrev:
744 if rev == nullrev:
745 continue
745 continue
746 # filter out revision we tested already
746 # filter out revision we tested already
747 if rev in tested:
747 if rev in tested:
748 continue
748 continue
749
749
750 # an higher authority deamed the base unworthy (e.g. censored)
750 # an higher authority deamed the base unworthy (e.g. censored)
751 if excluded_bases is not None and rev in excluded_bases:
751 if excluded_bases is not None and rev in excluded_bases:
752 tested.add(rev)
752 tested.add(rev)
753 continue
753 continue
754 # We are in some recomputation cases and that rev is too high in
754 # We are in some recomputation cases and that rev is too high in
755 # the revlog
755 # the revlog
756 if target_rev is not None and rev >= target_rev:
756 if target_rev is not None and rev >= target_rev:
757 tested.add(rev)
757 tested.add(rev)
758 continue
758 continue
759 # filter out delta base that will never produce good delta
759 # filter out delta base that will never produce good delta
760 if deltas_limit < revlog.length(rev):
760 if deltas_limit < revlog.length(rev):
761 tested.add(rev)
761 tested.add(rev)
762 continue
762 continue
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
764 tested.add(rev)
764 tested.add(rev)
765 continue
765 continue
766 # no delta for rawtext-changing revs (see "candelta" for why)
766 # no delta for rawtext-changing revs (see "candelta" for why)
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
768 tested.add(rev)
768 tested.add(rev)
769 continue
769 continue
770
770
771 # If we reach here, we are about to build and test a delta.
771 # If we reach here, we are about to build and test a delta.
772 # The delta building process will compute the chaininfo in all
772 # The delta building process will compute the chaininfo in all
773 # case, since that computation is cached, it is fine to access it
773 # case, since that computation is cached, it is fine to access it
774 # here too.
774 # here too.
775 chainlen, chainsize = revlog._chaininfo(rev)
775 chainlen, chainsize = revlog._chaininfo(rev)
776 # if chain will be too long, skip base
776 # if chain will be too long, skip base
777 if (
777 if (
778 revlog.delta_config.max_chain_len
778 revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
780 ):
780 ):
781 tested.add(rev)
781 tested.add(rev)
782 continue
782 continue
783 # if chain already have too much data, skip base
783 # if chain already have too much data, skip base
784 if deltas_limit < chainsize:
784 if deltas_limit < chainsize:
785 tested.add(rev)
785 tested.add(rev)
786 continue
786 continue
787 if sparse and revlog.upperboundcomp is not None:
787 if sparse and revlog.upperboundcomp is not None:
788 maxcomp = revlog.upperboundcomp
788 maxcomp = revlog.upperboundcomp
789 basenotsnap = (p1, p2, nullrev)
789 basenotsnap = (p1, p2, nullrev)
790 if rev not in basenotsnap and revlog.issnapshot(rev):
790 if rev not in basenotsnap and revlog.issnapshot(rev):
791 snapshotdepth = revlog.snapshotdepth(rev)
791 snapshotdepth = revlog.snapshotdepth(rev)
792 # If text is significantly larger than the base, we can
792 # If text is significantly larger than the base, we can
793 # expect the resulting delta to be proportional to the size
793 # expect the resulting delta to be proportional to the size
794 # difference
794 # difference
795 revsize = revlog.rawsize(rev)
795 revsize = revlog.rawsize(rev)
796 rawsizedistance = max(textlen - revsize, 0)
796 rawsizedistance = max(textlen - revsize, 0)
797 # use an estimate of the compression upper bound.
797 # use an estimate of the compression upper bound.
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
799
799
800 # check the absolute constraint on the delta size
800 # check the absolute constraint on the delta size
801 snapshotlimit = textlen >> snapshotdepth
801 snapshotlimit = textlen >> snapshotdepth
802 if snapshotlimit < lowestrealisticdeltalen:
802 if snapshotlimit < lowestrealisticdeltalen:
803 # delta lower bound is larger than accepted upper bound
803 # delta lower bound is larger than accepted upper bound
804 tested.add(rev)
804 tested.add(rev)
805 continue
805 continue
806
806
807 # check the relative constraint on the delta size
807 # check the relative constraint on the delta size
808 revlength = revlog.length(rev)
808 revlength = revlog.length(rev)
809 if revlength < lowestrealisticdeltalen:
809 if revlength < lowestrealisticdeltalen:
810 # delta probable lower bound is larger than target base
810 # delta probable lower bound is larger than target base
811 tested.add(rev)
811 tested.add(rev)
812 continue
812 continue
813
813
814 group.append(rev)
814 group.append(rev)
815 if group:
815 if group:
816 # When the size of the candidate group is big, it can result in a
816 # When the size of the candidate group is big, it can result in a
817 # quite significant performance impact. To reduce this, we can send
817 # quite significant performance impact. To reduce this, we can send
818 # them in smaller batches until the new batch does not provide any
818 # them in smaller batches until the new batch does not provide any
819 # improvements.
819 # improvements.
820 #
820 #
821 # This might reduce the overall efficiency of the compression in
821 # This might reduce the overall efficiency of the compression in
822 # some corner cases, but that should also prevent very pathological
822 # some corner cases, but that should also prevent very pathological
823 # cases from being an issue. (eg. 20 000 candidates).
823 # cases from being an issue. (eg. 20 000 candidates).
824 #
824 #
825 # XXX note that the ordering of the group becomes important as it
825 # XXX note that the ordering of the group becomes important as it
826 # now impacts the final result. The current order is unprocessed
826 # now impacts the final result. The current order is unprocessed
827 # and can be improved.
827 # and can be improved.
828 if group_chunk_size == 0:
828 if group_chunk_size == 0:
829 tested.update(group)
829 tested.update(group)
830 good = yield tuple(group)
830 good = yield tuple(group)
831 else:
831 else:
832 prev_good = good
832 prev_good = good
833 for start in range(0, len(group), group_chunk_size):
833 for start in range(0, len(group), group_chunk_size):
834 sub_group = group[start : start + group_chunk_size]
834 sub_group = group[start : start + group_chunk_size]
835 tested.update(sub_group)
835 tested.update(sub_group)
836 good = yield tuple(sub_group)
836 good = yield tuple(sub_group)
837 if prev_good == good:
837 if prev_good == good:
838 break
838 break
839
839
840 yield None
840 yield None
841
841
842
842
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
844 good = None
844 good = None
845 # First we try to reuse a the delta contained in the bundle.
845 # First we try to reuse a the delta contained in the bundle.
846 # (or from the source revlog)
846 # (or from the source revlog)
847 #
847 #
848 # This logic only applies to general delta repositories and can be disabled
848 # This logic only applies to general delta repositories and can be disabled
849 # through configuration. Disabling reuse source delta is useful when
849 # through configuration. Disabling reuse source delta is useful when
850 # we want to make sure we recomputed "optimal" deltas.
850 # we want to make sure we recomputed "optimal" deltas.
851 debug_info = None
851 debug_info = None
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
853 # Assume what we received from the server is a good choice
853 # Assume what we received from the server is a good choice
854 # build delta will reuse the cache
854 # build delta will reuse the cache
855 if debug_info is not None:
855 if debug_info is not None:
856 debug_info['cached-delta.tested'] += 1
856 debug_info['cached-delta.tested'] += 1
857 good = yield (cachedelta[0],)
857 good = yield (cachedelta[0],)
858 if good is not None:
858 if good is not None:
859 if debug_info is not None:
859 if debug_info is not None:
860 debug_info['cached-delta.accepted'] += 1
860 debug_info['cached-delta.accepted'] += 1
861 yield None
861 yield None
862 return
862 return
863 if snapshot_cache is None:
863 if snapshot_cache is None:
864 snapshot_cache = SnapshotCache()
864 snapshot_cache = SnapshotCache()
865 groups = _rawgroups(
865 groups = _rawgroups(
866 revlog,
866 revlog,
867 p1,
867 p1,
868 p2,
868 p2,
869 cachedelta,
869 cachedelta,
870 snapshot_cache,
870 snapshot_cache,
871 )
871 )
872 for candidates in groups:
872 for candidates in groups:
873 good = yield candidates
873 good = yield candidates
874 if good is not None:
874 if good is not None:
875 break
875 break
876
876
877 # If sparse revlog is enabled, we can try to refine the available deltas
877 # If sparse revlog is enabled, we can try to refine the available deltas
878 if not revlog._sparserevlog:
878 if not revlog._sparserevlog:
879 yield None
879 yield None
880 return
880 return
881
881
882 # if we have a refinable value, try to refine it
882 # if we have a refinable value, try to refine it
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
884 # refine snapshot down
884 # refine snapshot down
885 previous = None
885 previous = None
886 while previous != good:
886 while previous != good:
887 previous = good
887 previous = good
888 base = revlog.deltaparent(good)
888 base = revlog.deltaparent(good)
889 if base == nullrev:
889 if base == nullrev:
890 break
890 break
891 good = yield (base,)
891 good = yield (base,)
892 # refine snapshot up
892 # refine snapshot up
893 if not snapshot_cache.snapshots:
893 if not snapshot_cache.snapshots:
894 snapshot_cache.update(revlog, good + 1)
894 snapshot_cache.update(revlog, good + 1)
895 previous = None
895 previous = None
896 while good != previous:
896 while good != previous:
897 previous = good
897 previous = good
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
899 good = yield children
899 good = yield children
900
900
901 if debug_info is not None:
901 if debug_info is not None:
902 if good is None:
902 if good is None:
903 debug_info['no-solution'] += 1
903 debug_info['no-solution'] += 1
904
904
905 yield None
905 yield None
906
906
907
907
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
909 """Provides group of revision to be tested as delta base
909 """Provides group of revision to be tested as delta base
910
910
911 This lower level function focus on emitting delta theorically interresting
911 This lower level function focus on emitting delta theorically interresting
912 without looking it any practical details.
912 without looking it any practical details.
913
913
914 The group order aims at providing fast or small candidates first.
914 The group order aims at providing fast or small candidates first.
915 """
915 """
916 # Why search for delta base if we cannot use a delta base ?
916 # Why search for delta base if we cannot use a delta base ?
917 assert revlog.delta_config.general_delta
917 assert revlog.delta_config.general_delta
918 # also see issue6056
918 # also see issue6056
919 sparse = revlog._sparserevlog
919 sparse = revlog._sparserevlog
920 curr = len(revlog)
920 curr = len(revlog)
921 prev = curr - 1
921 prev = curr - 1
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
923
923
924 # exclude already lazy tested base if any
924 # exclude already lazy tested base if any
925 parents = [p for p in (p1, p2) if p != nullrev]
925 parents = [p for p in (p1, p2) if p != nullrev]
926
926
927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
927 if not revlog.delta_config.delta_both_parents and len(parents) == 2:
928 parents.sort()
928 parents.sort()
929 # To minimize the chance of having to build a fulltext,
929 # To minimize the chance of having to build a fulltext,
930 # pick first whichever parent is closest to us (max rev)
930 # pick first whichever parent is closest to us (max rev)
931 yield (parents[1],)
931 yield (parents[1],)
932 # then the other one (min rev) if the first did not fit
932 # then the other one (min rev) if the first did not fit
933 yield (parents[0],)
933 yield (parents[0],)
934 elif len(parents) > 0:
934 elif len(parents) > 0:
935 # Test all parents (1 or 2), and keep the best candidate
935 # Test all parents (1 or 2), and keep the best candidate
936 yield parents
936 yield parents
937
937
938 if sparse and parents:
938 if sparse and parents:
939 if snapshot_cache is None:
939 if snapshot_cache is None:
940 # map: base-rev: [snapshot-revs]
940 # map: base-rev: [snapshot-revs]
941 snapshot_cache = SnapshotCache()
941 snapshot_cache = SnapshotCache()
942 # See if we can use an existing snapshot in the parent chains to use as
942 # See if we can use an existing snapshot in the parent chains to use as
943 # a base for a new intermediate-snapshot
943 # a base for a new intermediate-snapshot
944 #
944 #
945 # search for snapshot in parents delta chain
945 # search for snapshot in parents delta chain
946 # map: snapshot-level: snapshot-rev
946 # map: snapshot-level: snapshot-rev
947 parents_snaps = collections.defaultdict(set)
947 parents_snaps = collections.defaultdict(set)
948 candidate_chains = [deltachain(p) for p in parents]
948 candidate_chains = [deltachain(p) for p in parents]
949 for chain in candidate_chains:
949 for chain in candidate_chains:
950 for idx, s in enumerate(chain):
950 for idx, s in enumerate(chain):
951 if not revlog.issnapshot(s):
951 if not revlog.issnapshot(s):
952 break
952 break
953 parents_snaps[idx].add(s)
953 parents_snaps[idx].add(s)
954 snapfloor = min(parents_snaps[0]) + 1
954 snapfloor = min(parents_snaps[0]) + 1
955 snapshot_cache.update(revlog, snapfloor)
955 snapshot_cache.update(revlog, snapfloor)
956 # search for the highest "unrelated" revision
956 # search for the highest "unrelated" revision
957 #
957 #
958 # Adding snapshots used by "unrelated" revision increase the odd we
958 # Adding snapshots used by "unrelated" revision increase the odd we
959 # reuse an independant, yet better snapshot chain.
959 # reuse an independant, yet better snapshot chain.
960 #
960 #
961 # XXX instead of building a set of revisions, we could lazily enumerate
961 # XXX instead of building a set of revisions, we could lazily enumerate
962 # over the chains. That would be more efficient, however we stick to
962 # over the chains. That would be more efficient, however we stick to
963 # simple code for now.
963 # simple code for now.
964 all_revs = set()
964 all_revs = set()
965 for chain in candidate_chains:
965 for chain in candidate_chains:
966 all_revs.update(chain)
966 all_revs.update(chain)
967 other = None
967 other = None
968 for r in revlog.revs(prev, snapfloor):
968 for r in revlog.revs(prev, snapfloor):
969 if r not in all_revs:
969 if r not in all_revs:
970 other = r
970 other = r
971 break
971 break
972 if other is not None:
972 if other is not None:
973 # To avoid unfair competition, we won't use unrelated intermediate
973 # To avoid unfair competition, we won't use unrelated intermediate
974 # snapshot that are deeper than the ones from the parent delta
974 # snapshot that are deeper than the ones from the parent delta
975 # chain.
975 # chain.
976 max_depth = max(parents_snaps.keys())
976 max_depth = max(parents_snaps.keys())
977 chain = deltachain(other)
977 chain = deltachain(other)
978 for depth, s in enumerate(chain):
978 for depth, s in enumerate(chain):
979 if s < snapfloor:
979 if s < snapfloor:
980 continue
980 continue
981 if max_depth < depth:
981 if max_depth < depth:
982 break
982 break
983 if not revlog.issnapshot(s):
983 if not revlog.issnapshot(s):
984 break
984 break
985 parents_snaps[depth].add(s)
985 parents_snaps[depth].add(s)
986 # Test them as possible intermediate snapshot base
986 # Test them as possible intermediate snapshot base
987 # We test them from highest to lowest level. High level one are more
987 # We test them from highest to lowest level. High level one are more
988 # likely to result in small delta
988 # likely to result in small delta
989 floor = None
989 floor = None
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
991 siblings = set()
991 siblings = set()
992 for s in snaps:
992 for s in snaps:
993 siblings.update(snapshot_cache.snapshots[s])
993 siblings.update(snapshot_cache.snapshots[s])
994 # Before considering making a new intermediate snapshot, we check
994 # Before considering making a new intermediate snapshot, we check
995 # if an existing snapshot, children of base we consider, would be
995 # if an existing snapshot, children of base we consider, would be
996 # suitable.
996 # suitable.
997 #
997 #
998 # It give a change to reuse a delta chain "unrelated" to the
998 # It give a change to reuse a delta chain "unrelated" to the
999 # current revision instead of starting our own. Without such
999 # current revision instead of starting our own. Without such
1000 # re-use, topological branches would keep reopening new chains.
1000 # re-use, topological branches would keep reopening new chains.
1001 # Creating more and more snapshot as the repository grow.
1001 # Creating more and more snapshot as the repository grow.
1002
1002
1003 if floor is not None:
1003 if floor is not None:
1004 # We only do this for siblings created after the one in our
1004 # We only do this for siblings created after the one in our
1005 # parent's delta chain. Those created before has less chances
1005 # parent's delta chain. Those created before has less chances
1006 # to be valid base since our ancestors had to create a new
1006 # to be valid base since our ancestors had to create a new
1007 # snapshot.
1007 # snapshot.
1008 siblings = [r for r in siblings if floor < r]
1008 siblings = [r for r in siblings if floor < r]
1009 yield tuple(sorted(siblings))
1009 yield tuple(sorted(siblings))
1010 # then test the base from our parent's delta chain.
1010 # then test the base from our parent's delta chain.
1011 yield tuple(sorted(snaps))
1011 yield tuple(sorted(snaps))
1012 floor = min(snaps)
1012 floor = min(snaps)
1013 # No suitable base found in the parent chain, search if any full
1013 # No suitable base found in the parent chain, search if any full
1014 # snapshots emitted since parent's base would be a suitable base for an
1014 # snapshots emitted since parent's base would be a suitable base for an
1015 # intermediate snapshot.
1015 # intermediate snapshot.
1016 #
1016 #
1017 # It give a chance to reuse a delta chain unrelated to the current
1017 # It give a chance to reuse a delta chain unrelated to the current
1018 # revisions instead of starting our own. Without such re-use,
1018 # revisions instead of starting our own. Without such re-use,
1019 # topological branches would keep reopening new full chains. Creating
1019 # topological branches would keep reopening new full chains. Creating
1020 # more and more snapshot as the repository grow.
1020 # more and more snapshot as the repository grow.
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1022 yield tuple(sorted(full))
1022 yield tuple(sorted(full))
1023
1023
1024 if not sparse:
1024 if not sparse:
1025 # other approach failed try against prev to hopefully save us a
1025 # other approach failed try against prev to hopefully save us a
1026 # fulltext.
1026 # fulltext.
1027 yield (prev,)
1027 yield (prev,)
1028
1028
1029
1029
1030 class SnapshotCache:
1030 class SnapshotCache:
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1032
1032
1033 def __init__(self):
1033 def __init__(self):
1034 self.snapshots = collections.defaultdict(set)
1034 self.snapshots = collections.defaultdict(set)
1035 self._start_rev = None
1035 self._start_rev = None
1036 self._end_rev = None
1036 self._end_rev = None
1037
1037
1038 def update(self, revlog, start_rev=0):
1038 def update(self, revlog, start_rev=0):
1039 """find snapshots from start_rev to tip"""
1039 """find snapshots from start_rev to tip"""
1040 nb_revs = len(revlog)
1040 nb_revs = len(revlog)
1041 end_rev = nb_revs - 1
1041 end_rev = nb_revs - 1
1042 if start_rev > end_rev:
1042 if start_rev > end_rev:
1043 return # range is empty
1043 return # range is empty
1044
1044
1045 if self._start_rev is None:
1045 if self._start_rev is None:
1046 assert self._end_rev is None
1046 assert self._end_rev is None
1047 self._update(revlog, start_rev, end_rev)
1047 self._update(revlog, start_rev, end_rev)
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1049 if start_rev < self._start_rev:
1049 if start_rev < self._start_rev:
1050 self._update(revlog, start_rev, self._start_rev - 1)
1050 self._update(revlog, start_rev, self._start_rev - 1)
1051 if self._end_rev < end_rev:
1051 if self._end_rev < end_rev:
1052 self._update(revlog, self._end_rev + 1, end_rev)
1052 self._update(revlog, self._end_rev + 1, end_rev)
1053
1053
1054 if self._start_rev is None:
1054 if self._start_rev is None:
1055 assert self._end_rev is None
1055 assert self._end_rev is None
1056 self._end_rev = end_rev
1056 self._end_rev = end_rev
1057 self._start_rev = start_rev
1057 self._start_rev = start_rev
1058 else:
1058 else:
1059 self._start_rev = min(self._start_rev, start_rev)
1059 self._start_rev = min(self._start_rev, start_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1061 assert self._start_rev <= self._end_rev, (
1061 assert self._start_rev <= self._end_rev, (
1062 self._start_rev,
1062 self._start_rev,
1063 self._end_rev,
1063 self._end_rev,
1064 )
1064 )
1065
1065
1066 def _update(self, revlog, start_rev, end_rev):
1066 def _update(self, revlog, start_rev, end_rev):
1067 """internal method that actually do update content"""
1067 """internal method that actually do update content"""
1068 assert self._start_rev is None or (
1068 assert self._start_rev is None or (
1069 start_rev < self._start_rev or start_rev > self._end_rev
1069 start_rev < self._start_rev or start_rev > self._end_rev
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1071 assert self._start_rev is None or (
1071 assert self._start_rev is None or (
1072 end_rev < self._start_rev or end_rev > self._end_rev
1072 end_rev < self._start_rev or end_rev > self._end_rev
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1074 cache = self.snapshots
1074 cache = self.snapshots
1075 if hasattr(revlog.index, 'findsnapshots'):
1075 if hasattr(revlog.index, 'findsnapshots'):
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1077 else:
1077 else:
1078 deltaparent = revlog.deltaparent
1078 deltaparent = revlog.deltaparent
1079 issnapshot = revlog.issnapshot
1079 issnapshot = revlog.issnapshot
1080 for rev in revlog.revs(start_rev, end_rev):
1080 for rev in revlog.revs(start_rev, end_rev):
1081 if issnapshot(rev):
1081 if issnapshot(rev):
1082 cache[deltaparent(rev)].add(rev)
1082 cache[deltaparent(rev)].add(rev)
1083
1083
1084
1084
1085 class deltacomputer:
1085 class deltacomputer:
1086 def __init__(
1086 def __init__(
1087 self,
1087 self,
1088 revlog,
1088 revlog,
1089 write_debug=None,
1089 write_debug=None,
1090 debug_search=False,
1090 debug_search=False,
1091 debug_info=None,
1091 debug_info=None,
1092 ):
1092 ):
1093 self.revlog = revlog
1093 self.revlog = revlog
1094 self._write_debug = write_debug
1094 self._write_debug = write_debug
1095 if write_debug is None:
1095 if write_debug is None:
1096 self._debug_search = False
1096 self._debug_search = False
1097 else:
1097 else:
1098 self._debug_search = debug_search
1098 self._debug_search = debug_search
1099 self._debug_info = debug_info
1099 self._debug_info = debug_info
1100 self._snapshot_cache = SnapshotCache()
1100 self._snapshot_cache = SnapshotCache()
1101
1101
1102 @property
1102 @property
1103 def _gather_debug(self):
1103 def _gather_debug(self):
1104 return self._write_debug is not None or self._debug_info is not None
1104 return self._write_debug is not None or self._debug_info is not None
1105
1105
1106 def buildtext(self, revinfo):
1106 def buildtext(self, revinfo):
1107 """Builds a fulltext version of a revision
1107 """Builds a fulltext version of a revision
1108
1108
1109 revinfo: revisioninfo instance that contains all needed info
1109 revinfo: revisioninfo instance that contains all needed info
1110 """
1110 """
1111 btext = revinfo.btext
1111 btext = revinfo.btext
1112 if btext[0] is not None:
1112 if btext[0] is not None:
1113 return btext[0]
1113 return btext[0]
1114
1114
1115 revlog = self.revlog
1115 revlog = self.revlog
1116 cachedelta = revinfo.cachedelta
1116 cachedelta = revinfo.cachedelta
1117 baserev = cachedelta[0]
1117 baserev = cachedelta[0]
1118 delta = cachedelta[1]
1118 delta = cachedelta[1]
1119
1119
1120 fulltext = btext[0] = _textfromdelta(
1120 fulltext = btext[0] = _textfromdelta(
1121 revlog,
1121 revlog,
1122 baserev,
1122 baserev,
1123 delta,
1123 delta,
1124 revinfo.p1,
1124 revinfo.p1,
1125 revinfo.p2,
1125 revinfo.p2,
1126 revinfo.flags,
1126 revinfo.flags,
1127 revinfo.node,
1127 revinfo.node,
1128 )
1128 )
1129 return fulltext
1129 return fulltext
1130
1130
1131 def _builddeltadiff(self, base, revinfo):
1131 def _builddeltadiff(self, base, revinfo):
1132 revlog = self.revlog
1132 revlog = self.revlog
1133 t = self.buildtext(revinfo)
1133 t = self.buildtext(revinfo)
1134 if revlog.iscensored(base):
1134 if revlog.iscensored(base):
1135 # deltas based on a censored revision must replace the
1135 # deltas based on a censored revision must replace the
1136 # full content in one patch, so delta works everywhere
1136 # full content in one patch, so delta works everywhere
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1138 delta = header + t
1138 delta = header + t
1139 else:
1139 else:
1140 ptext = revlog.rawdata(base)
1140 ptext = revlog.rawdata(base)
1141 delta = mdiff.textdiff(ptext, t)
1141 delta = mdiff.textdiff(ptext, t)
1142
1142
1143 return delta
1143 return delta
1144
1144
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1146 # can we use the cached delta?
1146 # can we use the cached delta?
1147 revlog = self.revlog
1147 revlog = self.revlog
1148 chainbase = revlog.chainbase(base)
1148 chainbase = revlog.chainbase(base)
1149 if revlog.delta_config.general_delta:
1149 if revlog.delta_config.general_delta:
1150 deltabase = base
1150 deltabase = base
1151 else:
1151 else:
1152 if target_rev is not None and base != target_rev - 1:
1152 if target_rev is not None and base != target_rev - 1:
1153 msg = (
1153 msg = (
1154 b'general delta cannot use delta for something else '
1154 b'general delta cannot use delta for something else '
1155 b'than `prev`: %d<-%d'
1155 b'than `prev`: %d<-%d'
1156 )
1156 )
1157 msg %= (base, target_rev)
1157 msg %= (base, target_rev)
1158 raise error.ProgrammingError(msg)
1158 raise error.ProgrammingError(msg)
1159 deltabase = chainbase
1159 deltabase = chainbase
1160 snapshotdepth = None
1160 snapshotdepth = None
1161 if revlog._sparserevlog and deltabase == nullrev:
1161 if revlog._sparserevlog and deltabase == nullrev:
1162 snapshotdepth = 0
1162 snapshotdepth = 0
1163 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1163 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1164 # A delta chain should always be one full snapshot,
1164 # A delta chain should always be one full snapshot,
1165 # zero or more semi-snapshots, and zero or more deltas
1165 # zero or more semi-snapshots, and zero or more deltas
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1169 delta = None
1169 delta = None
1170 if revinfo.cachedelta:
1170 if revinfo.cachedelta:
1171 cachebase = revinfo.cachedelta[0]
1171 cachebase = revinfo.cachedelta[0]
1172 # check if the diff still apply
1172 # check if the diff still apply
1173 currentbase = cachebase
1173 currentbase = cachebase
1174 while (
1174 while (
1175 currentbase != nullrev
1175 currentbase != nullrev
1176 and currentbase != base
1176 and currentbase != base
1177 and self.revlog.length(currentbase) == 0
1177 and self.revlog.length(currentbase) == 0
1178 ):
1178 ):
1179 currentbase = self.revlog.deltaparent(currentbase)
1179 currentbase = self.revlog.deltaparent(currentbase)
1180 if self.revlog._lazydelta and currentbase == base:
1180 if self.revlog._lazydelta and currentbase == base:
1181 delta = revinfo.cachedelta[1]
1181 delta = revinfo.cachedelta[1]
1182 if delta is None:
1182 if delta is None:
1183 delta = self._builddeltadiff(base, revinfo)
1183 delta = self._builddeltadiff(base, revinfo)
1184 if self._debug_search:
1184 if self._debug_search:
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1186 msg %= len(delta)
1186 msg %= len(delta)
1187 self._write_debug(msg)
1187 self._write_debug(msg)
1188 # snapshotdept need to be neither None nor 0 level snapshot
1188 # snapshotdept need to be neither None nor 0 level snapshot
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1192 if self._debug_search:
1192 if self._debug_search:
1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1194 msg %= lowestrealisticdeltalen
1194 msg %= lowestrealisticdeltalen
1195 self._write_debug(msg)
1195 self._write_debug(msg)
1196 if snapshotlimit < lowestrealisticdeltalen:
1196 if snapshotlimit < lowestrealisticdeltalen:
1197 if self._debug_search:
1197 if self._debug_search:
1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1199 self._write_debug(msg)
1199 self._write_debug(msg)
1200 return None
1200 return None
1201 if revlog.length(base) < lowestrealisticdeltalen:
1201 if revlog.length(base) < lowestrealisticdeltalen:
1202 if self._debug_search:
1202 if self._debug_search:
1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1204 self._write_debug(msg)
1204 self._write_debug(msg)
1205 return None
1205 return None
1206 header, data = revlog.compress(delta)
1206 header, data = revlog.compress(delta)
1207 deltalen = len(header) + len(data)
1207 deltalen = len(header) + len(data)
1208 offset = revlog.end(len(revlog) - 1)
1208 offset = revlog.end(len(revlog) - 1)
1209 dist = deltalen + offset - revlog.start(chainbase)
1209 dist = deltalen + offset - revlog.start(chainbase)
1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1211 chainlen += 1
1211 chainlen += 1
1212 compresseddeltalen += deltalen
1212 compresseddeltalen += deltalen
1213
1213
1214 return _deltainfo(
1214 return _deltainfo(
1215 dist,
1215 dist,
1216 deltalen,
1216 deltalen,
1217 (header, data),
1217 (header, data),
1218 deltabase,
1218 deltabase,
1219 chainbase,
1219 chainbase,
1220 chainlen,
1220 chainlen,
1221 compresseddeltalen,
1221 compresseddeltalen,
1222 snapshotdepth,
1222 snapshotdepth,
1223 )
1223 )
1224
1224
1225 def _fullsnapshotinfo(self, revinfo, curr):
1225 def _fullsnapshotinfo(self, revinfo, curr):
1226 rawtext = self.buildtext(revinfo)
1226 rawtext = self.buildtext(revinfo)
1227 data = self.revlog.compress(rawtext)
1227 data = self.revlog.compress(rawtext)
1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1229 deltabase = chainbase = curr
1229 deltabase = chainbase = curr
1230 snapshotdepth = 0
1230 snapshotdepth = 0
1231 chainlen = 1
1231 chainlen = 1
1232
1232
1233 return _deltainfo(
1233 return _deltainfo(
1234 dist,
1234 dist,
1235 deltalen,
1235 deltalen,
1236 data,
1236 data,
1237 deltabase,
1237 deltabase,
1238 chainbase,
1238 chainbase,
1239 chainlen,
1239 chainlen,
1240 compresseddeltalen,
1240 compresseddeltalen,
1241 snapshotdepth,
1241 snapshotdepth,
1242 )
1242 )
1243
1243
1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1245 """Find an acceptable delta against a candidate revision
1245 """Find an acceptable delta against a candidate revision
1246
1246
1247 revinfo: information about the revision (instance of _revisioninfo)
1247 revinfo: information about the revision (instance of _revisioninfo)
1248
1248
1249 Returns the first acceptable candidate revision, as ordered by
1249 Returns the first acceptable candidate revision, as ordered by
1250 _candidategroups
1250 _candidategroups
1251
1251
1252 If no suitable deltabase is found, we return delta info for a full
1252 If no suitable deltabase is found, we return delta info for a full
1253 snapshot.
1253 snapshot.
1254
1254
1255 `excluded_bases` is an optional set of revision that cannot be used as
1255 `excluded_bases` is an optional set of revision that cannot be used as
1256 a delta base. Use this to recompute delta suitable in censor or strip
1256 a delta base. Use this to recompute delta suitable in censor or strip
1257 context.
1257 context.
1258 """
1258 """
1259 if target_rev is None:
1259 if target_rev is None:
1260 target_rev = len(self.revlog)
1260 target_rev = len(self.revlog)
1261
1261
1262 gather_debug = self._gather_debug
1262 gather_debug = self._gather_debug
1263 cachedelta = revinfo.cachedelta
1263 cachedelta = revinfo.cachedelta
1264 revlog = self.revlog
1264 revlog = self.revlog
1265 p1r = p2r = None
1265 p1r = p2r = None
1266
1266
1267 if excluded_bases is None:
1267 if excluded_bases is None:
1268 excluded_bases = set()
1268 excluded_bases = set()
1269
1269
1270 if gather_debug:
1270 if gather_debug:
1271 start = util.timer()
1271 start = util.timer()
1272 dbg = self._one_dbg_data()
1272 dbg = self._one_dbg_data()
1273 dbg['revision'] = target_rev
1273 dbg['revision'] = target_rev
1274 target_revlog = b"UNKNOWN"
1274 target_revlog = b"UNKNOWN"
1275 target_type = self.revlog.target[0]
1275 target_type = self.revlog.target[0]
1276 target_key = self.revlog.target[1]
1276 target_key = self.revlog.target[1]
1277 if target_type == KIND_CHANGELOG:
1277 if target_type == KIND_CHANGELOG:
1278 target_revlog = b'CHANGELOG:'
1278 target_revlog = b'CHANGELOG:'
1279 elif target_type == KIND_MANIFESTLOG:
1279 elif target_type == KIND_MANIFESTLOG:
1280 target_revlog = b'MANIFESTLOG:'
1280 target_revlog = b'MANIFESTLOG:'
1281 if target_key:
1281 if target_key:
1282 target_revlog += b'%s:' % target_key
1282 target_revlog += b'%s:' % target_key
1283 elif target_type == KIND_FILELOG:
1283 elif target_type == KIND_FILELOG:
1284 target_revlog = b'FILELOG:'
1284 target_revlog = b'FILELOG:'
1285 if target_key:
1285 if target_key:
1286 target_revlog += b'%s:' % target_key
1286 target_revlog += b'%s:' % target_key
1287 dbg['target-revlog'] = target_revlog
1287 dbg['target-revlog'] = target_revlog
1288 p1r = revlog.rev(revinfo.p1)
1288 p1r = revlog.rev(revinfo.p1)
1289 p2r = revlog.rev(revinfo.p2)
1289 p2r = revlog.rev(revinfo.p2)
1290 if p1r != nullrev:
1290 if p1r != nullrev:
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1292 else:
1292 else:
1293 p1_chain_len = -1
1293 p1_chain_len = -1
1294 if p2r != nullrev:
1294 if p2r != nullrev:
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1296 else:
1296 else:
1297 p2_chain_len = -1
1297 p2_chain_len = -1
1298 dbg['p1-chain-len'] = p1_chain_len
1298 dbg['p1-chain-len'] = p1_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1300
1300
1301 # 1) if the revision is empty, no amount of delta can beat it
1301 # 1) if the revision is empty, no amount of delta can beat it
1302 #
1302 #
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1304 # not calling candelta since only one revision needs test, also to
1304 # not calling candelta since only one revision needs test, also to
1305 # avoid overhead fetching flags again.
1305 # avoid overhead fetching flags again.
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1308 if gather_debug:
1308 if gather_debug:
1309 end = util.timer()
1309 end = util.timer()
1310 dbg['duration'] = end - start
1310 dbg['duration'] = end - start
1311 dbg[
1311 dbg[
1312 'delta-base'
1312 'delta-base'
1313 ] = deltainfo.base # pytype: disable=attribute-error
1313 ] = deltainfo.base # pytype: disable=attribute-error
1314 dbg['search_round_count'] = 0
1314 dbg['search_round_count'] = 0
1315 dbg['using-cached-base'] = False
1315 dbg['using-cached-base'] = False
1316 dbg['delta_try_count'] = 0
1316 dbg['delta_try_count'] = 0
1317 dbg['type'] = b"full"
1317 dbg['type'] = b"full"
1318 dbg['snapshot-depth'] = 0
1318 dbg['snapshot-depth'] = 0
1319 self._dbg_process_data(dbg)
1319 self._dbg_process_data(dbg)
1320 return deltainfo
1320 return deltainfo
1321
1321
1322 deltainfo = None
1322 deltainfo = None
1323
1323
1324 # If this source delta are to be forcibly reuse, let us comply early.
1324 # If this source delta are to be forcibly reuse, let us comply early.
1325 if (
1325 if (
1326 revlog.delta_config.general_delta
1326 revlog.delta_config.general_delta
1327 and revinfo.cachedelta is not None
1327 and revinfo.cachedelta is not None
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 ):
1329 ):
1330 base = revinfo.cachedelta[0]
1330 base = revinfo.cachedelta[0]
1331 if base == nullrev:
1331 if base == nullrev:
1332 dbg_type = b"full"
1332 dbg_type = b"full"
1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1334 if gather_debug:
1334 if gather_debug:
1335 snapshotdepth = 0
1335 snapshotdepth = 0
1336 elif base not in excluded_bases:
1336 elif base not in excluded_bases:
1337 delta = revinfo.cachedelta[1]
1337 delta = revinfo.cachedelta[1]
1338 header, data = revlog.compress(delta)
1338 header, data = revlog.compress(delta)
1339 deltalen = len(header) + len(data)
1339 deltalen = len(header) + len(data)
1340 if gather_debug:
1340 if gather_debug:
1341 offset = revlog.end(len(revlog) - 1)
1341 offset = revlog.end(len(revlog) - 1)
1342 chainbase = revlog.chainbase(base)
1342 chainbase = revlog.chainbase(base)
1343 distance = deltalen + offset - revlog.start(chainbase)
1343 distance = deltalen + offset - revlog.start(chainbase)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 chainlen += 1
1345 chainlen += 1
1346 compresseddeltalen += deltalen
1346 compresseddeltalen += deltalen
1347 if base == p1r or base == p2r:
1347 if base == p1r or base == p2r:
1348 dbg_type = b"delta"
1348 dbg_type = b"delta"
1349 snapshotdepth = None
1349 snapshotdepth = None
1350 elif not revlog.issnapshot(base):
1350 elif not revlog.issnapshot(base):
1351 snapshotdepth = None
1351 snapshotdepth = None
1352 else:
1352 else:
1353 dbg_type = b"snapshot"
1353 dbg_type = b"snapshot"
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 else:
1355 else:
1356 distance = None
1356 distance = None
1357 chainbase = None
1357 chainbase = None
1358 chainlen = None
1358 chainlen = None
1359 compresseddeltalen = None
1359 compresseddeltalen = None
1360 snapshotdepth = None
1360 snapshotdepth = None
1361 deltainfo = _deltainfo(
1361 deltainfo = _deltainfo(
1362 distance=distance,
1362 distance=distance,
1363 deltalen=deltalen,
1363 deltalen=deltalen,
1364 data=(header, data),
1364 data=(header, data),
1365 base=base,
1365 base=base,
1366 chainbase=chainbase,
1366 chainbase=chainbase,
1367 chainlen=chainlen,
1367 chainlen=chainlen,
1368 compresseddeltalen=compresseddeltalen,
1368 compresseddeltalen=compresseddeltalen,
1369 snapshotdepth=snapshotdepth,
1369 snapshotdepth=snapshotdepth,
1370 )
1370 )
1371
1371
1372 if deltainfo is not None:
1372 if deltainfo is not None:
1373 if gather_debug:
1373 if gather_debug:
1374 end = util.timer()
1374 end = util.timer()
1375 dbg['duration'] = end - start
1375 dbg['duration'] = end - start
1376 dbg[
1376 dbg[
1377 'delta-base'
1377 'delta-base'
1378 ] = deltainfo.base # pytype: disable=attribute-error
1378 ] = deltainfo.base # pytype: disable=attribute-error
1379 dbg['search_round_count'] = 0
1379 dbg['search_round_count'] = 0
1380 dbg['using-cached-base'] = True
1380 dbg['using-cached-base'] = True
1381 dbg['delta_try_count'] = 0
1381 dbg['delta_try_count'] = 0
1382 dbg['type'] = b"full"
1382 dbg['type'] = b"full"
1383 if snapshotdepth is None:
1383 if snapshotdepth is None:
1384 dbg['snapshot-depth'] = 0
1384 dbg['snapshot-depth'] = 0
1385 else:
1385 else:
1386 dbg['snapshot-depth'] = snapshotdepth
1386 dbg['snapshot-depth'] = snapshotdepth
1387 self._dbg_process_data(dbg)
1387 self._dbg_process_data(dbg)
1388 return deltainfo
1388 return deltainfo
1389
1389
1390 # count the number of different delta we tried (for debug purpose)
1390 # count the number of different delta we tried (for debug purpose)
1391 dbg_try_count = 0
1391 dbg_try_count = 0
1392 # count the number of "search round" we did. (for debug purpose)
1392 # count the number of "search round" we did. (for debug purpose)
1393 dbg_try_rounds = 0
1393 dbg_try_rounds = 0
1394 dbg_type = b'unknown'
1394 dbg_type = b'unknown'
1395
1395
1396 if p1r is None:
1396 if p1r is None:
1397 p1r = revlog.rev(revinfo.p1)
1397 p1r = revlog.rev(revinfo.p1)
1398 p2r = revlog.rev(revinfo.p2)
1398 p2r = revlog.rev(revinfo.p2)
1399
1399
1400 if self._debug_search:
1400 if self._debug_search:
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1402 msg %= target_rev
1402 msg %= target_rev
1403 self._write_debug(msg)
1403 self._write_debug(msg)
1404
1404
1405 groups = _candidategroups(
1405 groups = _candidategroups(
1406 self.revlog,
1406 self.revlog,
1407 revinfo.textlen,
1407 revinfo.textlen,
1408 p1r,
1408 p1r,
1409 p2r,
1409 p2r,
1410 cachedelta,
1410 cachedelta,
1411 excluded_bases,
1411 excluded_bases,
1412 target_rev,
1412 target_rev,
1413 snapshot_cache=self._snapshot_cache,
1413 snapshot_cache=self._snapshot_cache,
1414 )
1414 )
1415 candidaterevs = next(groups)
1415 candidaterevs = next(groups)
1416 while candidaterevs is not None:
1416 while candidaterevs is not None:
1417 dbg_try_rounds += 1
1417 dbg_try_rounds += 1
1418 if self._debug_search:
1418 if self._debug_search:
1419 prev = None
1419 prev = None
1420 if deltainfo is not None:
1420 if deltainfo is not None:
1421 prev = deltainfo.base
1421 prev = deltainfo.base
1422
1422
1423 if (
1423 if (
1424 cachedelta is not None
1424 cachedelta is not None
1425 and len(candidaterevs) == 1
1425 and len(candidaterevs) == 1
1426 and cachedelta[0] in candidaterevs
1426 and cachedelta[0] in candidaterevs
1427 ):
1427 ):
1428 round_type = b"cached-delta"
1428 round_type = b"cached-delta"
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1430 round_type = b"parents"
1430 round_type = b"parents"
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1432 round_type = b"refine-down"
1432 round_type = b"refine-down"
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1434 round_type = b"refine-up"
1434 round_type = b"refine-up"
1435 else:
1435 else:
1436 round_type = b"search-down"
1436 round_type = b"search-down"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1439 self._write_debug(msg)
1439 self._write_debug(msg)
1440 nominateddeltas = []
1440 nominateddeltas = []
1441 if deltainfo is not None:
1441 if deltainfo is not None:
1442 if self._debug_search:
1442 if self._debug_search:
1443 msg = (
1443 msg = (
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1445 )
1445 )
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1447 self._write_debug(msg)
1447 self._write_debug(msg)
1448 # if we already found a good delta,
1448 # if we already found a good delta,
1449 # challenge it against refined candidates
1449 # challenge it against refined candidates
1450 nominateddeltas.append(deltainfo)
1450 nominateddeltas.append(deltainfo)
1451 for candidaterev in candidaterevs:
1451 for candidaterev in candidaterevs:
1452 if self._debug_search:
1452 if self._debug_search:
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1454 msg %= candidaterev
1454 msg %= candidaterev
1455 self._write_debug(msg)
1455 self._write_debug(msg)
1456 candidate_type = None
1456 candidate_type = None
1457 if candidaterev == p1r:
1457 if candidaterev == p1r:
1458 candidate_type = b"p1"
1458 candidate_type = b"p1"
1459 elif candidaterev == p2r:
1459 elif candidaterev == p2r:
1460 candidate_type = b"p2"
1460 candidate_type = b"p2"
1461 elif self.revlog.issnapshot(candidaterev):
1461 elif self.revlog.issnapshot(candidaterev):
1462 candidate_type = b"snapshot-%d"
1462 candidate_type = b"snapshot-%d"
1463 candidate_type %= self.revlog.snapshotdepth(
1463 candidate_type %= self.revlog.snapshotdepth(
1464 candidaterev
1464 candidaterev
1465 )
1465 )
1466
1466
1467 if candidate_type is not None:
1467 if candidate_type is not None:
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1469 msg %= candidate_type
1469 msg %= candidate_type
1470 self._write_debug(msg)
1470 self._write_debug(msg)
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1472 msg %= self.revlog.length(candidaterev)
1472 msg %= self.revlog.length(candidaterev)
1473 self._write_debug(msg)
1473 self._write_debug(msg)
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1475 msg %= self.revlog.deltaparent(candidaterev)
1475 msg %= self.revlog.deltaparent(candidaterev)
1476 self._write_debug(msg)
1476 self._write_debug(msg)
1477
1477
1478 dbg_try_count += 1
1478 dbg_try_count += 1
1479
1479
1480 if self._debug_search:
1480 if self._debug_search:
1481 delta_start = util.timer()
1481 delta_start = util.timer()
1482 candidatedelta = self._builddeltainfo(
1482 candidatedelta = self._builddeltainfo(
1483 revinfo,
1483 revinfo,
1484 candidaterev,
1484 candidaterev,
1485 target_rev=target_rev,
1485 target_rev=target_rev,
1486 )
1486 )
1487 if self._debug_search:
1487 if self._debug_search:
1488 delta_end = util.timer()
1488 delta_end = util.timer()
1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1490 msg %= delta_end - delta_start
1490 msg %= delta_end - delta_start
1491 self._write_debug(msg)
1491 self._write_debug(msg)
1492 if candidatedelta is not None:
1492 if candidatedelta is not None:
1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1494 if self._debug_search:
1494 if self._debug_search:
1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1496 msg %= candidatedelta.deltalen
1496 msg %= candidatedelta.deltalen
1497 self._write_debug(msg)
1497 self._write_debug(msg)
1498 nominateddeltas.append(candidatedelta)
1498 nominateddeltas.append(candidatedelta)
1499 elif self._debug_search:
1499 elif self._debug_search:
1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1501 msg %= candidatedelta.deltalen
1501 msg %= candidatedelta.deltalen
1502 self._write_debug(msg)
1502 self._write_debug(msg)
1503 elif self._debug_search:
1503 elif self._debug_search:
1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1505 self._write_debug(msg)
1505 self._write_debug(msg)
1506 if nominateddeltas:
1506 if nominateddeltas:
1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1508 if deltainfo is not None:
1508 if deltainfo is not None:
1509 candidaterevs = groups.send(deltainfo.base)
1509 candidaterevs = groups.send(deltainfo.base)
1510 else:
1510 else:
1511 candidaterevs = next(groups)
1511 candidaterevs = next(groups)
1512
1512
1513 if deltainfo is None:
1513 if deltainfo is None:
1514 dbg_type = b"full"
1514 dbg_type = b"full"
1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1517 dbg_type = b"snapshot"
1517 dbg_type = b"snapshot"
1518 else:
1518 else:
1519 dbg_type = b"delta"
1519 dbg_type = b"delta"
1520
1520
1521 if gather_debug:
1521 if gather_debug:
1522 end = util.timer()
1522 end = util.timer()
1523 if dbg_type == b'full':
1523 if dbg_type == b'full':
1524 used_cached = (
1524 used_cached = (
1525 cachedelta is not None
1525 cachedelta is not None
1526 and dbg_try_rounds == 0
1526 and dbg_try_rounds == 0
1527 and dbg_try_count == 0
1527 and dbg_try_count == 0
1528 and cachedelta[0] == nullrev
1528 and cachedelta[0] == nullrev
1529 )
1529 )
1530 else:
1530 else:
1531 used_cached = (
1531 used_cached = (
1532 cachedelta is not None
1532 cachedelta is not None
1533 and dbg_try_rounds == 1
1533 and dbg_try_rounds == 1
1534 and dbg_try_count == 1
1534 and dbg_try_count == 1
1535 and deltainfo.base == cachedelta[0]
1535 and deltainfo.base == cachedelta[0]
1536 )
1536 )
1537 dbg['duration'] = end - start
1537 dbg['duration'] = end - start
1538 dbg[
1538 dbg[
1539 'delta-base'
1539 'delta-base'
1540 ] = deltainfo.base # pytype: disable=attribute-error
1540 ] = deltainfo.base # pytype: disable=attribute-error
1541 dbg['search_round_count'] = dbg_try_rounds
1541 dbg['search_round_count'] = dbg_try_rounds
1542 dbg['using-cached-base'] = used_cached
1542 dbg['using-cached-base'] = used_cached
1543 dbg['delta_try_count'] = dbg_try_count
1543 dbg['delta_try_count'] = dbg_try_count
1544 dbg['type'] = dbg_type
1544 dbg['type'] = dbg_type
1545 if (
1545 if (
1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1547 is not None
1547 is not None
1548 ):
1548 ):
1549 dbg[
1549 dbg[
1550 'snapshot-depth'
1550 'snapshot-depth'
1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1552 else:
1552 else:
1553 dbg['snapshot-depth'] = 0
1553 dbg['snapshot-depth'] = 0
1554 self._dbg_process_data(dbg)
1554 self._dbg_process_data(dbg)
1555 return deltainfo
1555 return deltainfo
1556
1556
1557 def _one_dbg_data(self):
1557 def _one_dbg_data(self):
1558 return {
1558 return {
1559 'duration': None,
1559 'duration': None,
1560 'revision': None,
1560 'revision': None,
1561 'delta-base': None,
1561 'delta-base': None,
1562 'search_round_count': None,
1562 'search_round_count': None,
1563 'using-cached-base': None,
1563 'using-cached-base': None,
1564 'delta_try_count': None,
1564 'delta_try_count': None,
1565 'type': None,
1565 'type': None,
1566 'p1-chain-len': None,
1566 'p1-chain-len': None,
1567 'p2-chain-len': None,
1567 'p2-chain-len': None,
1568 'snapshot-depth': None,
1568 'snapshot-depth': None,
1569 'target-revlog': None,
1569 'target-revlog': None,
1570 }
1570 }
1571
1571
1572 def _dbg_process_data(self, dbg):
1572 def _dbg_process_data(self, dbg):
1573 if self._debug_info is not None:
1573 if self._debug_info is not None:
1574 self._debug_info.append(dbg)
1574 self._debug_info.append(dbg)
1575
1575
1576 if self._write_debug is not None:
1576 if self._write_debug is not None:
1577 msg = (
1577 msg = (
1578 b"DBG-DELTAS:"
1578 b"DBG-DELTAS:"
1579 b" %-12s"
1579 b" %-12s"
1580 b" rev=%d:"
1580 b" rev=%d:"
1581 b" delta-base=%d"
1581 b" delta-base=%d"
1582 b" is-cached=%d"
1582 b" is-cached=%d"
1583 b" - search-rounds=%d"
1583 b" - search-rounds=%d"
1584 b" try-count=%d"
1584 b" try-count=%d"
1585 b" - delta-type=%-6s"
1585 b" - delta-type=%-6s"
1586 b" snap-depth=%d"
1586 b" snap-depth=%d"
1587 b" - p1-chain-length=%d"
1587 b" - p1-chain-length=%d"
1588 b" p2-chain-length=%d"
1588 b" p2-chain-length=%d"
1589 b" - duration=%f"
1589 b" - duration=%f"
1590 b"\n"
1590 b"\n"
1591 )
1591 )
1592 msg %= (
1592 msg %= (
1593 dbg["target-revlog"],
1593 dbg["target-revlog"],
1594 dbg["revision"],
1594 dbg["revision"],
1595 dbg["delta-base"],
1595 dbg["delta-base"],
1596 dbg["using-cached-base"],
1596 dbg["using-cached-base"],
1597 dbg["search_round_count"],
1597 dbg["search_round_count"],
1598 dbg["delta_try_count"],
1598 dbg["delta_try_count"],
1599 dbg["type"],
1599 dbg["type"],
1600 dbg["snapshot-depth"],
1600 dbg["snapshot-depth"],
1601 dbg["p1-chain-len"],
1601 dbg["p1-chain-len"],
1602 dbg["p2-chain-len"],
1602 dbg["p2-chain-len"],
1603 dbg["duration"],
1603 dbg["duration"],
1604 )
1604 )
1605 self._write_debug(msg)
1605 self._write_debug(msg)
1606
1606
1607
1607
1608 def delta_compression(default_compression_header, deltainfo):
1608 def delta_compression(default_compression_header, deltainfo):
1609 """return (COMPRESSION_MODE, deltainfo)
1609 """return (COMPRESSION_MODE, deltainfo)
1610
1610
1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1612 compression.
1612 compression.
1613 """
1613 """
1614 h, d = deltainfo.data
1614 h, d = deltainfo.data
1615 compression_mode = COMP_MODE_INLINE
1615 compression_mode = COMP_MODE_INLINE
1616 if not h and not d:
1616 if not h and not d:
1617 # not data to store at all... declare them uncompressed
1617 # not data to store at all... declare them uncompressed
1618 compression_mode = COMP_MODE_PLAIN
1618 compression_mode = COMP_MODE_PLAIN
1619 elif not h:
1619 elif not h:
1620 t = d[0:1]
1620 t = d[0:1]
1621 if t == b'\0':
1621 if t == b'\0':
1622 compression_mode = COMP_MODE_PLAIN
1622 compression_mode = COMP_MODE_PLAIN
1623 elif t == default_compression_header:
1623 elif t == default_compression_header:
1624 compression_mode = COMP_MODE_DEFAULT
1624 compression_mode = COMP_MODE_DEFAULT
1625 elif h == b'u':
1625 elif h == b'u':
1626 # we have a more efficient way to declare uncompressed
1626 # we have a more efficient way to declare uncompressed
1627 h = b''
1627 h = b''
1628 compression_mode = COMP_MODE_PLAIN
1628 compression_mode = COMP_MODE_PLAIN
1629 deltainfo = drop_u_compression(deltainfo)
1629 deltainfo = drop_u_compression(deltainfo)
1630 return compression_mode, deltainfo
1630 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now