##// END OF EJS Templates
revlog: remove legacy usage of `_maxchainlen`...
marmoute -
r51945:e80e2d61 default
parent child Browse files
Show More
@@ -1,1624 +1,1630 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self._srdensitythreshold = density
53 self._srdensitythreshold = density
54 self._srmingapsize = mingap
54 self._srmingapsize = mingap
55 self.data_config = revlog.DataConfig()
55 self.data_config = revlog.DataConfig()
56 self.data_config.sr_density_threshold = density
56 self.data_config.sr_density_threshold = density
57 self.data_config.sr_min_gap_size = mingap
57 self.data_config.sr_min_gap_size = mingap
58 self.delta_config = revlog.DeltaConfig()
58 self.delta_config = revlog.DeltaConfig()
59 self.feature_config = revlog.FeatureConfig()
59 self.feature_config = revlog.FeatureConfig()
60 self._snapshot = set(snapshot)
60 self._snapshot = set(snapshot)
61 self.index = None
61 self.index = None
62
62
63 def start(self, rev):
63 def start(self, rev):
64 if rev == nullrev:
64 if rev == nullrev:
65 return 0
65 return 0
66 if rev == 0:
66 if rev == 0:
67 return 0
67 return 0
68 return self._data[rev - 1]
68 return self._data[rev - 1]
69
69
70 def end(self, rev):
70 def end(self, rev):
71 if rev == nullrev:
71 if rev == nullrev:
72 return 0
72 return 0
73 return self._data[rev]
73 return self._data[rev]
74
74
75 def length(self, rev):
75 def length(self, rev):
76 return self.end(rev) - self.start(rev)
76 return self.end(rev) - self.start(rev)
77
77
78 def __len__(self):
78 def __len__(self):
79 return len(self._data)
79 return len(self._data)
80
80
81 def issnapshot(self, rev):
81 def issnapshot(self, rev):
82 if rev == nullrev:
82 if rev == nullrev:
83 return True
83 return True
84 return rev in self._snapshot
84 return rev in self._snapshot
85
85
86
86
87 def slicechunk(revlog, revs, targetsize=None):
87 def slicechunk(revlog, revs, targetsize=None):
88 """slice revs to reduce the amount of unrelated data to be read from disk.
88 """slice revs to reduce the amount of unrelated data to be read from disk.
89
89
90 ``revs`` is sliced into groups that should be read in one time.
90 ``revs`` is sliced into groups that should be read in one time.
91 Assume that revs are sorted.
91 Assume that revs are sorted.
92
92
93 The initial chunk is sliced until the overall density (payload/chunks-span
93 The initial chunk is sliced until the overall density (payload/chunks-span
94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
94 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
95 `revlog._srmingapsize` is skipped.
95 `revlog._srmingapsize` is skipped.
96
96
97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
98 For consistency with other slicing choice, this limit won't go lower than
98 For consistency with other slicing choice, this limit won't go lower than
99 `revlog._srmingapsize`.
99 `revlog._srmingapsize`.
100
100
101 If individual revisions chunk are larger than this limit, they will still
101 If individual revisions chunk are larger than this limit, they will still
102 be raised individually.
102 be raised individually.
103
103
104 >>> data = [
104 >>> data = [
105 ... 5, #00 (5)
105 ... 5, #00 (5)
106 ... 10, #01 (5)
106 ... 10, #01 (5)
107 ... 12, #02 (2)
107 ... 12, #02 (2)
108 ... 12, #03 (empty)
108 ... 12, #03 (empty)
109 ... 27, #04 (15)
109 ... 27, #04 (15)
110 ... 31, #05 (4)
110 ... 31, #05 (4)
111 ... 31, #06 (empty)
111 ... 31, #06 (empty)
112 ... 42, #07 (11)
112 ... 42, #07 (11)
113 ... 47, #08 (5)
113 ... 47, #08 (5)
114 ... 47, #09 (empty)
114 ... 47, #09 (empty)
115 ... 48, #10 (1)
115 ... 48, #10 (1)
116 ... 51, #11 (3)
116 ... 51, #11 (3)
117 ... 74, #12 (23)
117 ... 74, #12 (23)
118 ... 85, #13 (11)
118 ... 85, #13 (11)
119 ... 86, #14 (1)
119 ... 86, #14 (1)
120 ... 91, #15 (5)
120 ... 91, #15 (5)
121 ... ]
121 ... ]
122 >>> revlog = _testrevlog(data, snapshot=range(16))
122 >>> revlog = _testrevlog(data, snapshot=range(16))
123
123
124 >>> list(slicechunk(revlog, list(range(16))))
124 >>> list(slicechunk(revlog, list(range(16))))
125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
126 >>> list(slicechunk(revlog, [0, 15]))
126 >>> list(slicechunk(revlog, [0, 15]))
127 [[0], [15]]
127 [[0], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 15]))
129 [[0], [11], [15]]
129 [[0], [11], [15]]
130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
131 [[0], [11, 13, 15]]
131 [[0], [11, 13, 15]]
132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
133 [[1, 2], [5, 8, 10, 11], [14]]
133 [[1, 2], [5, 8, 10, 11], [14]]
134
134
135 Slicing with a maximum chunk size
135 Slicing with a maximum chunk size
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
137 [[0], [11], [13], [15]]
137 [[0], [11], [13], [15]]
138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
139 [[0], [11], [13, 15]]
139 [[0], [11], [13, 15]]
140
140
141 Slicing involving nullrev
141 Slicing involving nullrev
142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
143 [[-1, 0], [11], [13, 15]]
143 [[-1, 0], [11], [13, 15]]
144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
145 [[-1], [13], [15]]
145 [[-1], [13], [15]]
146 """
146 """
147 if targetsize is not None:
147 if targetsize is not None:
148 targetsize = max(targetsize, revlog._srmingapsize)
148 targetsize = max(targetsize, revlog._srmingapsize)
149 # targetsize should not be specified when evaluating delta candidates:
149 # targetsize should not be specified when evaluating delta candidates:
150 # * targetsize is used to ensure we stay within specification when reading,
150 # * targetsize is used to ensure we stay within specification when reading,
151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
152 if densityslicing is None:
152 if densityslicing is None:
153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
154 for chunk in densityslicing(
154 for chunk in densityslicing(
155 revs, revlog._srdensitythreshold, revlog._srmingapsize
155 revs, revlog._srdensitythreshold, revlog._srmingapsize
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 def is_good_delta_info(revlog, deltainfo, revinfo):
587 def is_good_delta_info(revlog, deltainfo, revinfo):
588 """Returns True if the given delta is good. Good means that it is within
588 """Returns True if the given delta is good. Good means that it is within
589 the disk span, disk size, and chain length bounds that we know to be
589 the disk span, disk size, and chain length bounds that we know to be
590 performant."""
590 performant."""
591 if deltainfo is None:
591 if deltainfo is None:
592 return False
592 return False
593
593
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
594 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
595 # we should never end up asking such question. Adding the assert as a
595 # we should never end up asking such question. Adding the assert as a
596 # safe-guard to detect anything that would be fishy in this regard.
596 # safe-guard to detect anything that would be fishy in this regard.
597 assert (
597 assert (
598 revinfo.cachedelta is None
598 revinfo.cachedelta is None
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
599 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
600 or not revlog.delta_config.general_delta
600 or not revlog.delta_config.general_delta
601 )
601 )
602
602
603 # - 'deltainfo.distance' is the distance from the base revision --
603 # - 'deltainfo.distance' is the distance from the base revision --
604 # bounding it limits the amount of I/O we need to do.
604 # bounding it limits the amount of I/O we need to do.
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
605 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
606 # deltas we need to apply -- bounding it limits the amount of CPU
606 # deltas we need to apply -- bounding it limits the amount of CPU
607 # we consume.
607 # we consume.
608
608
609 textlen = revinfo.textlen
609 textlen = revinfo.textlen
610 defaultmax = textlen * 4
610 defaultmax = textlen * 4
611 maxdist = revlog._maxdeltachainspan
611 maxdist = revlog._maxdeltachainspan
612 if not maxdist:
612 if not maxdist:
613 maxdist = deltainfo.distance # ensure the conditional pass
613 maxdist = deltainfo.distance # ensure the conditional pass
614 maxdist = max(maxdist, defaultmax)
614 maxdist = max(maxdist, defaultmax)
615
615
616 # Bad delta from read span:
616 # Bad delta from read span:
617 #
617 #
618 # If the span of data read is larger than the maximum allowed.
618 # If the span of data read is larger than the maximum allowed.
619 #
619 #
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
620 # In the sparse-revlog case, we rely on the associated "sparse reading"
621 # to avoid issue related to the span of data. In theory, it would be
621 # to avoid issue related to the span of data. In theory, it would be
622 # possible to build pathological revlog where delta pattern would lead
622 # possible to build pathological revlog where delta pattern would lead
623 # to too many reads. However, they do not happen in practice at all. So
623 # to too many reads. However, they do not happen in practice at all. So
624 # we skip the span check entirely.
624 # we skip the span check entirely.
625 if not revlog._sparserevlog and maxdist < deltainfo.distance:
625 if not revlog._sparserevlog and maxdist < deltainfo.distance:
626 return False
626 return False
627
627
628 # Bad delta from new delta size:
628 # Bad delta from new delta size:
629 #
629 #
630 # If the delta size is larger than the target text, storing the
630 # If the delta size is larger than the target text, storing the
631 # delta will be inefficient.
631 # delta will be inefficient.
632 if textlen < deltainfo.deltalen:
632 if textlen < deltainfo.deltalen:
633 return False
633 return False
634
634
635 # Bad delta from cumulated payload size:
635 # Bad delta from cumulated payload size:
636 #
636 #
637 # If the sum of delta get larger than K * target text length.
637 # If the sum of delta get larger than K * target text length.
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
638 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
639 return False
639 return False
640
640
641 # Bad delta from chain length:
641 # Bad delta from chain length:
642 #
642 #
643 # If the number of delta in the chain gets too high.
643 # If the number of delta in the chain gets too high.
644 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
644 if (
645 revlog.delta_config.max_chain_len
646 and revlog.delta_config.max_chain_len < deltainfo.chainlen
647 ):
645 return False
648 return False
646
649
647 # bad delta from intermediate snapshot size limit
650 # bad delta from intermediate snapshot size limit
648 #
651 #
649 # If an intermediate snapshot size is higher than the limit. The
652 # If an intermediate snapshot size is higher than the limit. The
650 # limit exist to prevent endless chain of intermediate delta to be
653 # limit exist to prevent endless chain of intermediate delta to be
651 # created.
654 # created.
652 if (
655 if (
653 deltainfo.snapshotdepth is not None
656 deltainfo.snapshotdepth is not None
654 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
657 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
655 ):
658 ):
656 return False
659 return False
657
660
658 # bad delta if new intermediate snapshot is larger than the previous
661 # bad delta if new intermediate snapshot is larger than the previous
659 # snapshot
662 # snapshot
660 if (
663 if (
661 deltainfo.snapshotdepth
664 deltainfo.snapshotdepth
662 and revlog.length(deltainfo.base) < deltainfo.deltalen
665 and revlog.length(deltainfo.base) < deltainfo.deltalen
663 ):
666 ):
664 return False
667 return False
665
668
666 return True
669 return True
667
670
668
671
669 # If a revision's full text is that much bigger than a base candidate full
672 # If a revision's full text is that much bigger than a base candidate full
670 # text's, it is very unlikely that it will produce a valid delta. We no longer
673 # text's, it is very unlikely that it will produce a valid delta. We no longer
671 # consider these candidates.
674 # consider these candidates.
672 LIMIT_BASE2TEXT = 500
675 LIMIT_BASE2TEXT = 500
673
676
674
677
675 def _candidategroups(
678 def _candidategroups(
676 revlog,
679 revlog,
677 textlen,
680 textlen,
678 p1,
681 p1,
679 p2,
682 p2,
680 cachedelta,
683 cachedelta,
681 excluded_bases=None,
684 excluded_bases=None,
682 target_rev=None,
685 target_rev=None,
683 snapshot_cache=None,
686 snapshot_cache=None,
684 ):
687 ):
685 """Provides group of revision to be tested as delta base
688 """Provides group of revision to be tested as delta base
686
689
687 This top level function focus on emitting groups with unique and worthwhile
690 This top level function focus on emitting groups with unique and worthwhile
688 content. See _raw_candidate_groups for details about the group order.
691 content. See _raw_candidate_groups for details about the group order.
689 """
692 """
690 # should we try to build a delta?
693 # should we try to build a delta?
691 if not (len(revlog) and revlog._storedeltachains):
694 if not (len(revlog) and revlog._storedeltachains):
692 yield None
695 yield None
693 return
696 return
694
697
695 if target_rev is None:
698 if target_rev is None:
696 target_rev = len(revlog)
699 target_rev = len(revlog)
697
700
698 if not revlog.delta_config.general_delta:
701 if not revlog.delta_config.general_delta:
699 # before general delta, there is only one possible delta base
702 # before general delta, there is only one possible delta base
700 yield (target_rev - 1,)
703 yield (target_rev - 1,)
701 yield None
704 yield None
702 return
705 return
703
706
704 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
707 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
705 # we should never end up asking such question. Adding the assert as a
708 # we should never end up asking such question. Adding the assert as a
706 # safe-guard to detect anything that would be fishy in this regard.
709 # safe-guard to detect anything that would be fishy in this regard.
707 assert (
710 assert (
708 cachedelta is None
711 cachedelta is None
709 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
712 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
710 or not revlog.delta_config.general_delta
713 or not revlog.delta_config.general_delta
711 )
714 )
712
715
713 deltalength = revlog.length
716 deltalength = revlog.length
714 deltaparent = revlog.deltaparent
717 deltaparent = revlog.deltaparent
715 sparse = revlog._sparserevlog
718 sparse = revlog._sparserevlog
716 good = None
719 good = None
717
720
718 deltas_limit = textlen * LIMIT_DELTA2TEXT
721 deltas_limit = textlen * LIMIT_DELTA2TEXT
719 group_chunk_size = revlog._candidate_group_chunk_size
722 group_chunk_size = revlog._candidate_group_chunk_size
720
723
721 tested = {nullrev}
724 tested = {nullrev}
722 candidates = _refinedgroups(
725 candidates = _refinedgroups(
723 revlog,
726 revlog,
724 p1,
727 p1,
725 p2,
728 p2,
726 cachedelta,
729 cachedelta,
727 snapshot_cache=snapshot_cache,
730 snapshot_cache=snapshot_cache,
728 )
731 )
729 while True:
732 while True:
730 temptative = candidates.send(good)
733 temptative = candidates.send(good)
731 if temptative is None:
734 if temptative is None:
732 break
735 break
733 group = []
736 group = []
734 for rev in temptative:
737 for rev in temptative:
735 # skip over empty delta (no need to include them in a chain)
738 # skip over empty delta (no need to include them in a chain)
736 while not (rev == nullrev or rev in tested or deltalength(rev)):
739 while not (rev == nullrev or rev in tested or deltalength(rev)):
737 tested.add(rev)
740 tested.add(rev)
738 rev = deltaparent(rev)
741 rev = deltaparent(rev)
739 # no need to try a delta against nullrev, this will be done as a
742 # no need to try a delta against nullrev, this will be done as a
740 # last resort.
743 # last resort.
741 if rev == nullrev:
744 if rev == nullrev:
742 continue
745 continue
743 # filter out revision we tested already
746 # filter out revision we tested already
744 if rev in tested:
747 if rev in tested:
745 continue
748 continue
746
749
747 # an higher authority deamed the base unworthy (e.g. censored)
750 # an higher authority deamed the base unworthy (e.g. censored)
748 if excluded_bases is not None and rev in excluded_bases:
751 if excluded_bases is not None and rev in excluded_bases:
749 tested.add(rev)
752 tested.add(rev)
750 continue
753 continue
751 # We are in some recomputation cases and that rev is too high in
754 # We are in some recomputation cases and that rev is too high in
752 # the revlog
755 # the revlog
753 if target_rev is not None and rev >= target_rev:
756 if target_rev is not None and rev >= target_rev:
754 tested.add(rev)
757 tested.add(rev)
755 continue
758 continue
756 # filter out delta base that will never produce good delta
759 # filter out delta base that will never produce good delta
757 if deltas_limit < revlog.length(rev):
760 if deltas_limit < revlog.length(rev):
758 tested.add(rev)
761 tested.add(rev)
759 continue
762 continue
760 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
761 tested.add(rev)
764 tested.add(rev)
762 continue
765 continue
763 # no delta for rawtext-changing revs (see "candelta" for why)
766 # no delta for rawtext-changing revs (see "candelta" for why)
764 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
765 tested.add(rev)
768 tested.add(rev)
766 continue
769 continue
767
770
768 # If we reach here, we are about to build and test a delta.
771 # If we reach here, we are about to build and test a delta.
769 # The delta building process will compute the chaininfo in all
772 # The delta building process will compute the chaininfo in all
770 # case, since that computation is cached, it is fine to access it
773 # case, since that computation is cached, it is fine to access it
771 # here too.
774 # here too.
772 chainlen, chainsize = revlog._chaininfo(rev)
775 chainlen, chainsize = revlog._chaininfo(rev)
773 # if chain will be too long, skip base
776 # if chain will be too long, skip base
774 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
777 if (
778 revlog.delta_config.max_chain_len
779 and chainlen >= revlog.delta_config.max_chain_len
780 ):
775 tested.add(rev)
781 tested.add(rev)
776 continue
782 continue
777 # if chain already have too much data, skip base
783 # if chain already have too much data, skip base
778 if deltas_limit < chainsize:
784 if deltas_limit < chainsize:
779 tested.add(rev)
785 tested.add(rev)
780 continue
786 continue
781 if sparse and revlog.upperboundcomp is not None:
787 if sparse and revlog.upperboundcomp is not None:
782 maxcomp = revlog.upperboundcomp
788 maxcomp = revlog.upperboundcomp
783 basenotsnap = (p1, p2, nullrev)
789 basenotsnap = (p1, p2, nullrev)
784 if rev not in basenotsnap and revlog.issnapshot(rev):
790 if rev not in basenotsnap and revlog.issnapshot(rev):
785 snapshotdepth = revlog.snapshotdepth(rev)
791 snapshotdepth = revlog.snapshotdepth(rev)
786 # If text is significantly larger than the base, we can
792 # If text is significantly larger than the base, we can
787 # expect the resulting delta to be proportional to the size
793 # expect the resulting delta to be proportional to the size
788 # difference
794 # difference
789 revsize = revlog.rawsize(rev)
795 revsize = revlog.rawsize(rev)
790 rawsizedistance = max(textlen - revsize, 0)
796 rawsizedistance = max(textlen - revsize, 0)
791 # use an estimate of the compression upper bound.
797 # use an estimate of the compression upper bound.
792 lowestrealisticdeltalen = rawsizedistance // maxcomp
798 lowestrealisticdeltalen = rawsizedistance // maxcomp
793
799
794 # check the absolute constraint on the delta size
800 # check the absolute constraint on the delta size
795 snapshotlimit = textlen >> snapshotdepth
801 snapshotlimit = textlen >> snapshotdepth
796 if snapshotlimit < lowestrealisticdeltalen:
802 if snapshotlimit < lowestrealisticdeltalen:
797 # delta lower bound is larger than accepted upper bound
803 # delta lower bound is larger than accepted upper bound
798 tested.add(rev)
804 tested.add(rev)
799 continue
805 continue
800
806
801 # check the relative constraint on the delta size
807 # check the relative constraint on the delta size
802 revlength = revlog.length(rev)
808 revlength = revlog.length(rev)
803 if revlength < lowestrealisticdeltalen:
809 if revlength < lowestrealisticdeltalen:
804 # delta probable lower bound is larger than target base
810 # delta probable lower bound is larger than target base
805 tested.add(rev)
811 tested.add(rev)
806 continue
812 continue
807
813
808 group.append(rev)
814 group.append(rev)
809 if group:
815 if group:
810 # When the size of the candidate group is big, it can result in a
816 # When the size of the candidate group is big, it can result in a
811 # quite significant performance impact. To reduce this, we can send
817 # quite significant performance impact. To reduce this, we can send
812 # them in smaller batches until the new batch does not provide any
818 # them in smaller batches until the new batch does not provide any
813 # improvements.
819 # improvements.
814 #
820 #
815 # This might reduce the overall efficiency of the compression in
821 # This might reduce the overall efficiency of the compression in
816 # some corner cases, but that should also prevent very pathological
822 # some corner cases, but that should also prevent very pathological
817 # cases from being an issue. (eg. 20 000 candidates).
823 # cases from being an issue. (eg. 20 000 candidates).
818 #
824 #
819 # XXX note that the ordering of the group becomes important as it
825 # XXX note that the ordering of the group becomes important as it
820 # now impacts the final result. The current order is unprocessed
826 # now impacts the final result. The current order is unprocessed
821 # and can be improved.
827 # and can be improved.
822 if group_chunk_size == 0:
828 if group_chunk_size == 0:
823 tested.update(group)
829 tested.update(group)
824 good = yield tuple(group)
830 good = yield tuple(group)
825 else:
831 else:
826 prev_good = good
832 prev_good = good
827 for start in range(0, len(group), group_chunk_size):
833 for start in range(0, len(group), group_chunk_size):
828 sub_group = group[start : start + group_chunk_size]
834 sub_group = group[start : start + group_chunk_size]
829 tested.update(sub_group)
835 tested.update(sub_group)
830 good = yield tuple(sub_group)
836 good = yield tuple(sub_group)
831 if prev_good == good:
837 if prev_good == good:
832 break
838 break
833
839
834 yield None
840 yield None
835
841
836
842
837 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
843 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
838 good = None
844 good = None
839 # First we try to reuse a the delta contained in the bundle.
845 # First we try to reuse a the delta contained in the bundle.
840 # (or from the source revlog)
846 # (or from the source revlog)
841 #
847 #
842 # This logic only applies to general delta repositories and can be disabled
848 # This logic only applies to general delta repositories and can be disabled
843 # through configuration. Disabling reuse source delta is useful when
849 # through configuration. Disabling reuse source delta is useful when
844 # we want to make sure we recomputed "optimal" deltas.
850 # we want to make sure we recomputed "optimal" deltas.
845 debug_info = None
851 debug_info = None
846 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
852 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
847 # Assume what we received from the server is a good choice
853 # Assume what we received from the server is a good choice
848 # build delta will reuse the cache
854 # build delta will reuse the cache
849 if debug_info is not None:
855 if debug_info is not None:
850 debug_info['cached-delta.tested'] += 1
856 debug_info['cached-delta.tested'] += 1
851 good = yield (cachedelta[0],)
857 good = yield (cachedelta[0],)
852 if good is not None:
858 if good is not None:
853 if debug_info is not None:
859 if debug_info is not None:
854 debug_info['cached-delta.accepted'] += 1
860 debug_info['cached-delta.accepted'] += 1
855 yield None
861 yield None
856 return
862 return
857 if snapshot_cache is None:
863 if snapshot_cache is None:
858 snapshot_cache = SnapshotCache()
864 snapshot_cache = SnapshotCache()
859 groups = _rawgroups(
865 groups = _rawgroups(
860 revlog,
866 revlog,
861 p1,
867 p1,
862 p2,
868 p2,
863 cachedelta,
869 cachedelta,
864 snapshot_cache,
870 snapshot_cache,
865 )
871 )
866 for candidates in groups:
872 for candidates in groups:
867 good = yield candidates
873 good = yield candidates
868 if good is not None:
874 if good is not None:
869 break
875 break
870
876
871 # If sparse revlog is enabled, we can try to refine the available deltas
877 # If sparse revlog is enabled, we can try to refine the available deltas
872 if not revlog._sparserevlog:
878 if not revlog._sparserevlog:
873 yield None
879 yield None
874 return
880 return
875
881
876 # if we have a refinable value, try to refine it
882 # if we have a refinable value, try to refine it
877 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
883 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
878 # refine snapshot down
884 # refine snapshot down
879 previous = None
885 previous = None
880 while previous != good:
886 while previous != good:
881 previous = good
887 previous = good
882 base = revlog.deltaparent(good)
888 base = revlog.deltaparent(good)
883 if base == nullrev:
889 if base == nullrev:
884 break
890 break
885 good = yield (base,)
891 good = yield (base,)
886 # refine snapshot up
892 # refine snapshot up
887 if not snapshot_cache.snapshots:
893 if not snapshot_cache.snapshots:
888 snapshot_cache.update(revlog, good + 1)
894 snapshot_cache.update(revlog, good + 1)
889 previous = None
895 previous = None
890 while good != previous:
896 while good != previous:
891 previous = good
897 previous = good
892 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
898 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
893 good = yield children
899 good = yield children
894
900
895 if debug_info is not None:
901 if debug_info is not None:
896 if good is None:
902 if good is None:
897 debug_info['no-solution'] += 1
903 debug_info['no-solution'] += 1
898
904
899 yield None
905 yield None
900
906
901
907
902 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
908 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
903 """Provides group of revision to be tested as delta base
909 """Provides group of revision to be tested as delta base
904
910
905 This lower level function focus on emitting delta theorically interresting
911 This lower level function focus on emitting delta theorically interresting
906 without looking it any practical details.
912 without looking it any practical details.
907
913
908 The group order aims at providing fast or small candidates first.
914 The group order aims at providing fast or small candidates first.
909 """
915 """
910 # Why search for delta base if we cannot use a delta base ?
916 # Why search for delta base if we cannot use a delta base ?
911 assert revlog.delta_config.general_delta
917 assert revlog.delta_config.general_delta
912 # also see issue6056
918 # also see issue6056
913 sparse = revlog._sparserevlog
919 sparse = revlog._sparserevlog
914 curr = len(revlog)
920 curr = len(revlog)
915 prev = curr - 1
921 prev = curr - 1
916 deltachain = lambda rev: revlog._deltachain(rev)[0]
922 deltachain = lambda rev: revlog._deltachain(rev)[0]
917
923
918 # exclude already lazy tested base if any
924 # exclude already lazy tested base if any
919 parents = [p for p in (p1, p2) if p != nullrev]
925 parents = [p for p in (p1, p2) if p != nullrev]
920
926
921 if not revlog._deltabothparents and len(parents) == 2:
927 if not revlog._deltabothparents and len(parents) == 2:
922 parents.sort()
928 parents.sort()
923 # To minimize the chance of having to build a fulltext,
929 # To minimize the chance of having to build a fulltext,
924 # pick first whichever parent is closest to us (max rev)
930 # pick first whichever parent is closest to us (max rev)
925 yield (parents[1],)
931 yield (parents[1],)
926 # then the other one (min rev) if the first did not fit
932 # then the other one (min rev) if the first did not fit
927 yield (parents[0],)
933 yield (parents[0],)
928 elif len(parents) > 0:
934 elif len(parents) > 0:
929 # Test all parents (1 or 2), and keep the best candidate
935 # Test all parents (1 or 2), and keep the best candidate
930 yield parents
936 yield parents
931
937
932 if sparse and parents:
938 if sparse and parents:
933 if snapshot_cache is None:
939 if snapshot_cache is None:
934 # map: base-rev: [snapshot-revs]
940 # map: base-rev: [snapshot-revs]
935 snapshot_cache = SnapshotCache()
941 snapshot_cache = SnapshotCache()
936 # See if we can use an existing snapshot in the parent chains to use as
942 # See if we can use an existing snapshot in the parent chains to use as
937 # a base for a new intermediate-snapshot
943 # a base for a new intermediate-snapshot
938 #
944 #
939 # search for snapshot in parents delta chain
945 # search for snapshot in parents delta chain
940 # map: snapshot-level: snapshot-rev
946 # map: snapshot-level: snapshot-rev
941 parents_snaps = collections.defaultdict(set)
947 parents_snaps = collections.defaultdict(set)
942 candidate_chains = [deltachain(p) for p in parents]
948 candidate_chains = [deltachain(p) for p in parents]
943 for chain in candidate_chains:
949 for chain in candidate_chains:
944 for idx, s in enumerate(chain):
950 for idx, s in enumerate(chain):
945 if not revlog.issnapshot(s):
951 if not revlog.issnapshot(s):
946 break
952 break
947 parents_snaps[idx].add(s)
953 parents_snaps[idx].add(s)
948 snapfloor = min(parents_snaps[0]) + 1
954 snapfloor = min(parents_snaps[0]) + 1
949 snapshot_cache.update(revlog, snapfloor)
955 snapshot_cache.update(revlog, snapfloor)
950 # search for the highest "unrelated" revision
956 # search for the highest "unrelated" revision
951 #
957 #
952 # Adding snapshots used by "unrelated" revision increase the odd we
958 # Adding snapshots used by "unrelated" revision increase the odd we
953 # reuse an independant, yet better snapshot chain.
959 # reuse an independant, yet better snapshot chain.
954 #
960 #
955 # XXX instead of building a set of revisions, we could lazily enumerate
961 # XXX instead of building a set of revisions, we could lazily enumerate
956 # over the chains. That would be more efficient, however we stick to
962 # over the chains. That would be more efficient, however we stick to
957 # simple code for now.
963 # simple code for now.
958 all_revs = set()
964 all_revs = set()
959 for chain in candidate_chains:
965 for chain in candidate_chains:
960 all_revs.update(chain)
966 all_revs.update(chain)
961 other = None
967 other = None
962 for r in revlog.revs(prev, snapfloor):
968 for r in revlog.revs(prev, snapfloor):
963 if r not in all_revs:
969 if r not in all_revs:
964 other = r
970 other = r
965 break
971 break
966 if other is not None:
972 if other is not None:
967 # To avoid unfair competition, we won't use unrelated intermediate
973 # To avoid unfair competition, we won't use unrelated intermediate
968 # snapshot that are deeper than the ones from the parent delta
974 # snapshot that are deeper than the ones from the parent delta
969 # chain.
975 # chain.
970 max_depth = max(parents_snaps.keys())
976 max_depth = max(parents_snaps.keys())
971 chain = deltachain(other)
977 chain = deltachain(other)
972 for depth, s in enumerate(chain):
978 for depth, s in enumerate(chain):
973 if s < snapfloor:
979 if s < snapfloor:
974 continue
980 continue
975 if max_depth < depth:
981 if max_depth < depth:
976 break
982 break
977 if not revlog.issnapshot(s):
983 if not revlog.issnapshot(s):
978 break
984 break
979 parents_snaps[depth].add(s)
985 parents_snaps[depth].add(s)
980 # Test them as possible intermediate snapshot base
986 # Test them as possible intermediate snapshot base
981 # We test them from highest to lowest level. High level one are more
987 # We test them from highest to lowest level. High level one are more
982 # likely to result in small delta
988 # likely to result in small delta
983 floor = None
989 floor = None
984 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
990 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
985 siblings = set()
991 siblings = set()
986 for s in snaps:
992 for s in snaps:
987 siblings.update(snapshot_cache.snapshots[s])
993 siblings.update(snapshot_cache.snapshots[s])
988 # Before considering making a new intermediate snapshot, we check
994 # Before considering making a new intermediate snapshot, we check
989 # if an existing snapshot, children of base we consider, would be
995 # if an existing snapshot, children of base we consider, would be
990 # suitable.
996 # suitable.
991 #
997 #
992 # It give a change to reuse a delta chain "unrelated" to the
998 # It give a change to reuse a delta chain "unrelated" to the
993 # current revision instead of starting our own. Without such
999 # current revision instead of starting our own. Without such
994 # re-use, topological branches would keep reopening new chains.
1000 # re-use, topological branches would keep reopening new chains.
995 # Creating more and more snapshot as the repository grow.
1001 # Creating more and more snapshot as the repository grow.
996
1002
997 if floor is not None:
1003 if floor is not None:
998 # We only do this for siblings created after the one in our
1004 # We only do this for siblings created after the one in our
999 # parent's delta chain. Those created before has less chances
1005 # parent's delta chain. Those created before has less chances
1000 # to be valid base since our ancestors had to create a new
1006 # to be valid base since our ancestors had to create a new
1001 # snapshot.
1007 # snapshot.
1002 siblings = [r for r in siblings if floor < r]
1008 siblings = [r for r in siblings if floor < r]
1003 yield tuple(sorted(siblings))
1009 yield tuple(sorted(siblings))
1004 # then test the base from our parent's delta chain.
1010 # then test the base from our parent's delta chain.
1005 yield tuple(sorted(snaps))
1011 yield tuple(sorted(snaps))
1006 floor = min(snaps)
1012 floor = min(snaps)
1007 # No suitable base found in the parent chain, search if any full
1013 # No suitable base found in the parent chain, search if any full
1008 # snapshots emitted since parent's base would be a suitable base for an
1014 # snapshots emitted since parent's base would be a suitable base for an
1009 # intermediate snapshot.
1015 # intermediate snapshot.
1010 #
1016 #
1011 # It give a chance to reuse a delta chain unrelated to the current
1017 # It give a chance to reuse a delta chain unrelated to the current
1012 # revisions instead of starting our own. Without such re-use,
1018 # revisions instead of starting our own. Without such re-use,
1013 # topological branches would keep reopening new full chains. Creating
1019 # topological branches would keep reopening new full chains. Creating
1014 # more and more snapshot as the repository grow.
1020 # more and more snapshot as the repository grow.
1015 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1021 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1016 yield tuple(sorted(full))
1022 yield tuple(sorted(full))
1017
1023
1018 if not sparse:
1024 if not sparse:
1019 # other approach failed try against prev to hopefully save us a
1025 # other approach failed try against prev to hopefully save us a
1020 # fulltext.
1026 # fulltext.
1021 yield (prev,)
1027 yield (prev,)
1022
1028
1023
1029
1024 class SnapshotCache:
1030 class SnapshotCache:
1025 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1031 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1026
1032
1027 def __init__(self):
1033 def __init__(self):
1028 self.snapshots = collections.defaultdict(set)
1034 self.snapshots = collections.defaultdict(set)
1029 self._start_rev = None
1035 self._start_rev = None
1030 self._end_rev = None
1036 self._end_rev = None
1031
1037
1032 def update(self, revlog, start_rev=0):
1038 def update(self, revlog, start_rev=0):
1033 """find snapshots from start_rev to tip"""
1039 """find snapshots from start_rev to tip"""
1034 nb_revs = len(revlog)
1040 nb_revs = len(revlog)
1035 end_rev = nb_revs - 1
1041 end_rev = nb_revs - 1
1036 if start_rev > end_rev:
1042 if start_rev > end_rev:
1037 return # range is empty
1043 return # range is empty
1038
1044
1039 if self._start_rev is None:
1045 if self._start_rev is None:
1040 assert self._end_rev is None
1046 assert self._end_rev is None
1041 self._update(revlog, start_rev, end_rev)
1047 self._update(revlog, start_rev, end_rev)
1042 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1048 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1043 if start_rev < self._start_rev:
1049 if start_rev < self._start_rev:
1044 self._update(revlog, start_rev, self._start_rev - 1)
1050 self._update(revlog, start_rev, self._start_rev - 1)
1045 if self._end_rev < end_rev:
1051 if self._end_rev < end_rev:
1046 self._update(revlog, self._end_rev + 1, end_rev)
1052 self._update(revlog, self._end_rev + 1, end_rev)
1047
1053
1048 if self._start_rev is None:
1054 if self._start_rev is None:
1049 assert self._end_rev is None
1055 assert self._end_rev is None
1050 self._end_rev = end_rev
1056 self._end_rev = end_rev
1051 self._start_rev = start_rev
1057 self._start_rev = start_rev
1052 else:
1058 else:
1053 self._start_rev = min(self._start_rev, start_rev)
1059 self._start_rev = min(self._start_rev, start_rev)
1054 self._end_rev = max(self._end_rev, end_rev)
1060 self._end_rev = max(self._end_rev, end_rev)
1055 assert self._start_rev <= self._end_rev, (
1061 assert self._start_rev <= self._end_rev, (
1056 self._start_rev,
1062 self._start_rev,
1057 self._end_rev,
1063 self._end_rev,
1058 )
1064 )
1059
1065
1060 def _update(self, revlog, start_rev, end_rev):
1066 def _update(self, revlog, start_rev, end_rev):
1061 """internal method that actually do update content"""
1067 """internal method that actually do update content"""
1062 assert self._start_rev is None or (
1068 assert self._start_rev is None or (
1063 start_rev < self._start_rev or start_rev > self._end_rev
1069 start_rev < self._start_rev or start_rev > self._end_rev
1064 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 assert self._start_rev is None or (
1071 assert self._start_rev is None or (
1066 end_rev < self._start_rev or end_rev > self._end_rev
1072 end_rev < self._start_rev or end_rev > self._end_rev
1067 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1073 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1068 cache = self.snapshots
1074 cache = self.snapshots
1069 if hasattr(revlog.index, 'findsnapshots'):
1075 if hasattr(revlog.index, 'findsnapshots'):
1070 revlog.index.findsnapshots(cache, start_rev, end_rev)
1076 revlog.index.findsnapshots(cache, start_rev, end_rev)
1071 else:
1077 else:
1072 deltaparent = revlog.deltaparent
1078 deltaparent = revlog.deltaparent
1073 issnapshot = revlog.issnapshot
1079 issnapshot = revlog.issnapshot
1074 for rev in revlog.revs(start_rev, end_rev):
1080 for rev in revlog.revs(start_rev, end_rev):
1075 if issnapshot(rev):
1081 if issnapshot(rev):
1076 cache[deltaparent(rev)].add(rev)
1082 cache[deltaparent(rev)].add(rev)
1077
1083
1078
1084
1079 class deltacomputer:
1085 class deltacomputer:
1080 def __init__(
1086 def __init__(
1081 self,
1087 self,
1082 revlog,
1088 revlog,
1083 write_debug=None,
1089 write_debug=None,
1084 debug_search=False,
1090 debug_search=False,
1085 debug_info=None,
1091 debug_info=None,
1086 ):
1092 ):
1087 self.revlog = revlog
1093 self.revlog = revlog
1088 self._write_debug = write_debug
1094 self._write_debug = write_debug
1089 if write_debug is None:
1095 if write_debug is None:
1090 self._debug_search = False
1096 self._debug_search = False
1091 else:
1097 else:
1092 self._debug_search = debug_search
1098 self._debug_search = debug_search
1093 self._debug_info = debug_info
1099 self._debug_info = debug_info
1094 self._snapshot_cache = SnapshotCache()
1100 self._snapshot_cache = SnapshotCache()
1095
1101
1096 @property
1102 @property
1097 def _gather_debug(self):
1103 def _gather_debug(self):
1098 return self._write_debug is not None or self._debug_info is not None
1104 return self._write_debug is not None or self._debug_info is not None
1099
1105
1100 def buildtext(self, revinfo):
1106 def buildtext(self, revinfo):
1101 """Builds a fulltext version of a revision
1107 """Builds a fulltext version of a revision
1102
1108
1103 revinfo: revisioninfo instance that contains all needed info
1109 revinfo: revisioninfo instance that contains all needed info
1104 """
1110 """
1105 btext = revinfo.btext
1111 btext = revinfo.btext
1106 if btext[0] is not None:
1112 if btext[0] is not None:
1107 return btext[0]
1113 return btext[0]
1108
1114
1109 revlog = self.revlog
1115 revlog = self.revlog
1110 cachedelta = revinfo.cachedelta
1116 cachedelta = revinfo.cachedelta
1111 baserev = cachedelta[0]
1117 baserev = cachedelta[0]
1112 delta = cachedelta[1]
1118 delta = cachedelta[1]
1113
1119
1114 fulltext = btext[0] = _textfromdelta(
1120 fulltext = btext[0] = _textfromdelta(
1115 revlog,
1121 revlog,
1116 baserev,
1122 baserev,
1117 delta,
1123 delta,
1118 revinfo.p1,
1124 revinfo.p1,
1119 revinfo.p2,
1125 revinfo.p2,
1120 revinfo.flags,
1126 revinfo.flags,
1121 revinfo.node,
1127 revinfo.node,
1122 )
1128 )
1123 return fulltext
1129 return fulltext
1124
1130
1125 def _builddeltadiff(self, base, revinfo):
1131 def _builddeltadiff(self, base, revinfo):
1126 revlog = self.revlog
1132 revlog = self.revlog
1127 t = self.buildtext(revinfo)
1133 t = self.buildtext(revinfo)
1128 if revlog.iscensored(base):
1134 if revlog.iscensored(base):
1129 # deltas based on a censored revision must replace the
1135 # deltas based on a censored revision must replace the
1130 # full content in one patch, so delta works everywhere
1136 # full content in one patch, so delta works everywhere
1131 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1137 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1132 delta = header + t
1138 delta = header + t
1133 else:
1139 else:
1134 ptext = revlog.rawdata(base)
1140 ptext = revlog.rawdata(base)
1135 delta = mdiff.textdiff(ptext, t)
1141 delta = mdiff.textdiff(ptext, t)
1136
1142
1137 return delta
1143 return delta
1138
1144
1139 def _builddeltainfo(self, revinfo, base, target_rev=None):
1145 def _builddeltainfo(self, revinfo, base, target_rev=None):
1140 # can we use the cached delta?
1146 # can we use the cached delta?
1141 revlog = self.revlog
1147 revlog = self.revlog
1142 chainbase = revlog.chainbase(base)
1148 chainbase = revlog.chainbase(base)
1143 if revlog.delta_config.general_delta:
1149 if revlog.delta_config.general_delta:
1144 deltabase = base
1150 deltabase = base
1145 else:
1151 else:
1146 if target_rev is not None and base != target_rev - 1:
1152 if target_rev is not None and base != target_rev - 1:
1147 msg = (
1153 msg = (
1148 b'general delta cannot use delta for something else '
1154 b'general delta cannot use delta for something else '
1149 b'than `prev`: %d<-%d'
1155 b'than `prev`: %d<-%d'
1150 )
1156 )
1151 msg %= (base, target_rev)
1157 msg %= (base, target_rev)
1152 raise error.ProgrammingError(msg)
1158 raise error.ProgrammingError(msg)
1153 deltabase = chainbase
1159 deltabase = chainbase
1154 snapshotdepth = None
1160 snapshotdepth = None
1155 if revlog._sparserevlog and deltabase == nullrev:
1161 if revlog._sparserevlog and deltabase == nullrev:
1156 snapshotdepth = 0
1162 snapshotdepth = 0
1157 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1163 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1158 # A delta chain should always be one full snapshot,
1164 # A delta chain should always be one full snapshot,
1159 # zero or more semi-snapshots, and zero or more deltas
1165 # zero or more semi-snapshots, and zero or more deltas
1160 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1166 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1161 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1167 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1162 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1168 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1163 delta = None
1169 delta = None
1164 if revinfo.cachedelta:
1170 if revinfo.cachedelta:
1165 cachebase = revinfo.cachedelta[0]
1171 cachebase = revinfo.cachedelta[0]
1166 # check if the diff still apply
1172 # check if the diff still apply
1167 currentbase = cachebase
1173 currentbase = cachebase
1168 while (
1174 while (
1169 currentbase != nullrev
1175 currentbase != nullrev
1170 and currentbase != base
1176 and currentbase != base
1171 and self.revlog.length(currentbase) == 0
1177 and self.revlog.length(currentbase) == 0
1172 ):
1178 ):
1173 currentbase = self.revlog.deltaparent(currentbase)
1179 currentbase = self.revlog.deltaparent(currentbase)
1174 if self.revlog._lazydelta and currentbase == base:
1180 if self.revlog._lazydelta and currentbase == base:
1175 delta = revinfo.cachedelta[1]
1181 delta = revinfo.cachedelta[1]
1176 if delta is None:
1182 if delta is None:
1177 delta = self._builddeltadiff(base, revinfo)
1183 delta = self._builddeltadiff(base, revinfo)
1178 if self._debug_search:
1184 if self._debug_search:
1179 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1185 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1180 msg %= len(delta)
1186 msg %= len(delta)
1181 self._write_debug(msg)
1187 self._write_debug(msg)
1182 # snapshotdept need to be neither None nor 0 level snapshot
1188 # snapshotdept need to be neither None nor 0 level snapshot
1183 if revlog.upperboundcomp is not None and snapshotdepth:
1189 if revlog.upperboundcomp is not None and snapshotdepth:
1184 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1190 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1185 snapshotlimit = revinfo.textlen >> snapshotdepth
1191 snapshotlimit = revinfo.textlen >> snapshotdepth
1186 if self._debug_search:
1192 if self._debug_search:
1187 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1193 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1188 msg %= lowestrealisticdeltalen
1194 msg %= lowestrealisticdeltalen
1189 self._write_debug(msg)
1195 self._write_debug(msg)
1190 if snapshotlimit < lowestrealisticdeltalen:
1196 if snapshotlimit < lowestrealisticdeltalen:
1191 if self._debug_search:
1197 if self._debug_search:
1192 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1198 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1193 self._write_debug(msg)
1199 self._write_debug(msg)
1194 return None
1200 return None
1195 if revlog.length(base) < lowestrealisticdeltalen:
1201 if revlog.length(base) < lowestrealisticdeltalen:
1196 if self._debug_search:
1202 if self._debug_search:
1197 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1203 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1198 self._write_debug(msg)
1204 self._write_debug(msg)
1199 return None
1205 return None
1200 header, data = revlog.compress(delta)
1206 header, data = revlog.compress(delta)
1201 deltalen = len(header) + len(data)
1207 deltalen = len(header) + len(data)
1202 offset = revlog.end(len(revlog) - 1)
1208 offset = revlog.end(len(revlog) - 1)
1203 dist = deltalen + offset - revlog.start(chainbase)
1209 dist = deltalen + offset - revlog.start(chainbase)
1204 chainlen, compresseddeltalen = revlog._chaininfo(base)
1210 chainlen, compresseddeltalen = revlog._chaininfo(base)
1205 chainlen += 1
1211 chainlen += 1
1206 compresseddeltalen += deltalen
1212 compresseddeltalen += deltalen
1207
1213
1208 return _deltainfo(
1214 return _deltainfo(
1209 dist,
1215 dist,
1210 deltalen,
1216 deltalen,
1211 (header, data),
1217 (header, data),
1212 deltabase,
1218 deltabase,
1213 chainbase,
1219 chainbase,
1214 chainlen,
1220 chainlen,
1215 compresseddeltalen,
1221 compresseddeltalen,
1216 snapshotdepth,
1222 snapshotdepth,
1217 )
1223 )
1218
1224
1219 def _fullsnapshotinfo(self, revinfo, curr):
1225 def _fullsnapshotinfo(self, revinfo, curr):
1220 rawtext = self.buildtext(revinfo)
1226 rawtext = self.buildtext(revinfo)
1221 data = self.revlog.compress(rawtext)
1227 data = self.revlog.compress(rawtext)
1222 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1228 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1223 deltabase = chainbase = curr
1229 deltabase = chainbase = curr
1224 snapshotdepth = 0
1230 snapshotdepth = 0
1225 chainlen = 1
1231 chainlen = 1
1226
1232
1227 return _deltainfo(
1233 return _deltainfo(
1228 dist,
1234 dist,
1229 deltalen,
1235 deltalen,
1230 data,
1236 data,
1231 deltabase,
1237 deltabase,
1232 chainbase,
1238 chainbase,
1233 chainlen,
1239 chainlen,
1234 compresseddeltalen,
1240 compresseddeltalen,
1235 snapshotdepth,
1241 snapshotdepth,
1236 )
1242 )
1237
1243
1238 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1244 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1239 """Find an acceptable delta against a candidate revision
1245 """Find an acceptable delta against a candidate revision
1240
1246
1241 revinfo: information about the revision (instance of _revisioninfo)
1247 revinfo: information about the revision (instance of _revisioninfo)
1242
1248
1243 Returns the first acceptable candidate revision, as ordered by
1249 Returns the first acceptable candidate revision, as ordered by
1244 _candidategroups
1250 _candidategroups
1245
1251
1246 If no suitable deltabase is found, we return delta info for a full
1252 If no suitable deltabase is found, we return delta info for a full
1247 snapshot.
1253 snapshot.
1248
1254
1249 `excluded_bases` is an optional set of revision that cannot be used as
1255 `excluded_bases` is an optional set of revision that cannot be used as
1250 a delta base. Use this to recompute delta suitable in censor or strip
1256 a delta base. Use this to recompute delta suitable in censor or strip
1251 context.
1257 context.
1252 """
1258 """
1253 if target_rev is None:
1259 if target_rev is None:
1254 target_rev = len(self.revlog)
1260 target_rev = len(self.revlog)
1255
1261
1256 gather_debug = self._gather_debug
1262 gather_debug = self._gather_debug
1257 cachedelta = revinfo.cachedelta
1263 cachedelta = revinfo.cachedelta
1258 revlog = self.revlog
1264 revlog = self.revlog
1259 p1r = p2r = None
1265 p1r = p2r = None
1260
1266
1261 if excluded_bases is None:
1267 if excluded_bases is None:
1262 excluded_bases = set()
1268 excluded_bases = set()
1263
1269
1264 if gather_debug:
1270 if gather_debug:
1265 start = util.timer()
1271 start = util.timer()
1266 dbg = self._one_dbg_data()
1272 dbg = self._one_dbg_data()
1267 dbg['revision'] = target_rev
1273 dbg['revision'] = target_rev
1268 target_revlog = b"UNKNOWN"
1274 target_revlog = b"UNKNOWN"
1269 target_type = self.revlog.target[0]
1275 target_type = self.revlog.target[0]
1270 target_key = self.revlog.target[1]
1276 target_key = self.revlog.target[1]
1271 if target_type == KIND_CHANGELOG:
1277 if target_type == KIND_CHANGELOG:
1272 target_revlog = b'CHANGELOG:'
1278 target_revlog = b'CHANGELOG:'
1273 elif target_type == KIND_MANIFESTLOG:
1279 elif target_type == KIND_MANIFESTLOG:
1274 target_revlog = b'MANIFESTLOG:'
1280 target_revlog = b'MANIFESTLOG:'
1275 if target_key:
1281 if target_key:
1276 target_revlog += b'%s:' % target_key
1282 target_revlog += b'%s:' % target_key
1277 elif target_type == KIND_FILELOG:
1283 elif target_type == KIND_FILELOG:
1278 target_revlog = b'FILELOG:'
1284 target_revlog = b'FILELOG:'
1279 if target_key:
1285 if target_key:
1280 target_revlog += b'%s:' % target_key
1286 target_revlog += b'%s:' % target_key
1281 dbg['target-revlog'] = target_revlog
1287 dbg['target-revlog'] = target_revlog
1282 p1r = revlog.rev(revinfo.p1)
1288 p1r = revlog.rev(revinfo.p1)
1283 p2r = revlog.rev(revinfo.p2)
1289 p2r = revlog.rev(revinfo.p2)
1284 if p1r != nullrev:
1290 if p1r != nullrev:
1285 p1_chain_len = revlog._chaininfo(p1r)[0]
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1286 else:
1292 else:
1287 p1_chain_len = -1
1293 p1_chain_len = -1
1288 if p2r != nullrev:
1294 if p2r != nullrev:
1289 p2_chain_len = revlog._chaininfo(p2r)[0]
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1290 else:
1296 else:
1291 p2_chain_len = -1
1297 p2_chain_len = -1
1292 dbg['p1-chain-len'] = p1_chain_len
1298 dbg['p1-chain-len'] = p1_chain_len
1293 dbg['p2-chain-len'] = p2_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1294
1300
1295 # 1) if the revision is empty, no amount of delta can beat it
1301 # 1) if the revision is empty, no amount of delta can beat it
1296 #
1302 #
1297 # 2) no delta for flag processor revision (see "candelta" for why)
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1298 # not calling candelta since only one revision needs test, also to
1304 # not calling candelta since only one revision needs test, also to
1299 # avoid overhead fetching flags again.
1305 # avoid overhead fetching flags again.
1300 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1301 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1307 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1302 if gather_debug:
1308 if gather_debug:
1303 end = util.timer()
1309 end = util.timer()
1304 dbg['duration'] = end - start
1310 dbg['duration'] = end - start
1305 dbg[
1311 dbg[
1306 'delta-base'
1312 'delta-base'
1307 ] = deltainfo.base # pytype: disable=attribute-error
1313 ] = deltainfo.base # pytype: disable=attribute-error
1308 dbg['search_round_count'] = 0
1314 dbg['search_round_count'] = 0
1309 dbg['using-cached-base'] = False
1315 dbg['using-cached-base'] = False
1310 dbg['delta_try_count'] = 0
1316 dbg['delta_try_count'] = 0
1311 dbg['type'] = b"full"
1317 dbg['type'] = b"full"
1312 dbg['snapshot-depth'] = 0
1318 dbg['snapshot-depth'] = 0
1313 self._dbg_process_data(dbg)
1319 self._dbg_process_data(dbg)
1314 return deltainfo
1320 return deltainfo
1315
1321
1316 deltainfo = None
1322 deltainfo = None
1317
1323
1318 # If this source delta are to be forcibly reuse, let us comply early.
1324 # If this source delta are to be forcibly reuse, let us comply early.
1319 if (
1325 if (
1320 revlog.delta_config.general_delta
1326 revlog.delta_config.general_delta
1321 and revinfo.cachedelta is not None
1327 and revinfo.cachedelta is not None
1322 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1323 ):
1329 ):
1324 base = revinfo.cachedelta[0]
1330 base = revinfo.cachedelta[0]
1325 if base == nullrev:
1331 if base == nullrev:
1326 dbg_type = b"full"
1332 dbg_type = b"full"
1327 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1333 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1328 if gather_debug:
1334 if gather_debug:
1329 snapshotdepth = 0
1335 snapshotdepth = 0
1330 elif base not in excluded_bases:
1336 elif base not in excluded_bases:
1331 delta = revinfo.cachedelta[1]
1337 delta = revinfo.cachedelta[1]
1332 header, data = revlog.compress(delta)
1338 header, data = revlog.compress(delta)
1333 deltalen = len(header) + len(data)
1339 deltalen = len(header) + len(data)
1334 if gather_debug:
1340 if gather_debug:
1335 offset = revlog.end(len(revlog) - 1)
1341 offset = revlog.end(len(revlog) - 1)
1336 chainbase = revlog.chainbase(base)
1342 chainbase = revlog.chainbase(base)
1337 distance = deltalen + offset - revlog.start(chainbase)
1343 distance = deltalen + offset - revlog.start(chainbase)
1338 chainlen, compresseddeltalen = revlog._chaininfo(base)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1339 chainlen += 1
1345 chainlen += 1
1340 compresseddeltalen += deltalen
1346 compresseddeltalen += deltalen
1341 if base == p1r or base == p2r:
1347 if base == p1r or base == p2r:
1342 dbg_type = b"delta"
1348 dbg_type = b"delta"
1343 snapshotdepth = None
1349 snapshotdepth = None
1344 elif not revlog.issnapshot(base):
1350 elif not revlog.issnapshot(base):
1345 snapshotdepth = None
1351 snapshotdepth = None
1346 else:
1352 else:
1347 dbg_type = b"snapshot"
1353 dbg_type = b"snapshot"
1348 snapshotdepth = revlog.snapshotdepth(base) + 1
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1349 else:
1355 else:
1350 distance = None
1356 distance = None
1351 chainbase = None
1357 chainbase = None
1352 chainlen = None
1358 chainlen = None
1353 compresseddeltalen = None
1359 compresseddeltalen = None
1354 snapshotdepth = None
1360 snapshotdepth = None
1355 deltainfo = _deltainfo(
1361 deltainfo = _deltainfo(
1356 distance=distance,
1362 distance=distance,
1357 deltalen=deltalen,
1363 deltalen=deltalen,
1358 data=(header, data),
1364 data=(header, data),
1359 base=base,
1365 base=base,
1360 chainbase=chainbase,
1366 chainbase=chainbase,
1361 chainlen=chainlen,
1367 chainlen=chainlen,
1362 compresseddeltalen=compresseddeltalen,
1368 compresseddeltalen=compresseddeltalen,
1363 snapshotdepth=snapshotdepth,
1369 snapshotdepth=snapshotdepth,
1364 )
1370 )
1365
1371
1366 if deltainfo is not None:
1372 if deltainfo is not None:
1367 if gather_debug:
1373 if gather_debug:
1368 end = util.timer()
1374 end = util.timer()
1369 dbg['duration'] = end - start
1375 dbg['duration'] = end - start
1370 dbg[
1376 dbg[
1371 'delta-base'
1377 'delta-base'
1372 ] = deltainfo.base # pytype: disable=attribute-error
1378 ] = deltainfo.base # pytype: disable=attribute-error
1373 dbg['search_round_count'] = 0
1379 dbg['search_round_count'] = 0
1374 dbg['using-cached-base'] = True
1380 dbg['using-cached-base'] = True
1375 dbg['delta_try_count'] = 0
1381 dbg['delta_try_count'] = 0
1376 dbg['type'] = b"full"
1382 dbg['type'] = b"full"
1377 if snapshotdepth is None:
1383 if snapshotdepth is None:
1378 dbg['snapshot-depth'] = 0
1384 dbg['snapshot-depth'] = 0
1379 else:
1385 else:
1380 dbg['snapshot-depth'] = snapshotdepth
1386 dbg['snapshot-depth'] = snapshotdepth
1381 self._dbg_process_data(dbg)
1387 self._dbg_process_data(dbg)
1382 return deltainfo
1388 return deltainfo
1383
1389
1384 # count the number of different delta we tried (for debug purpose)
1390 # count the number of different delta we tried (for debug purpose)
1385 dbg_try_count = 0
1391 dbg_try_count = 0
1386 # count the number of "search round" we did. (for debug purpose)
1392 # count the number of "search round" we did. (for debug purpose)
1387 dbg_try_rounds = 0
1393 dbg_try_rounds = 0
1388 dbg_type = b'unknown'
1394 dbg_type = b'unknown'
1389
1395
1390 if p1r is None:
1396 if p1r is None:
1391 p1r = revlog.rev(revinfo.p1)
1397 p1r = revlog.rev(revinfo.p1)
1392 p2r = revlog.rev(revinfo.p2)
1398 p2r = revlog.rev(revinfo.p2)
1393
1399
1394 if self._debug_search:
1400 if self._debug_search:
1395 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1396 msg %= target_rev
1402 msg %= target_rev
1397 self._write_debug(msg)
1403 self._write_debug(msg)
1398
1404
1399 groups = _candidategroups(
1405 groups = _candidategroups(
1400 self.revlog,
1406 self.revlog,
1401 revinfo.textlen,
1407 revinfo.textlen,
1402 p1r,
1408 p1r,
1403 p2r,
1409 p2r,
1404 cachedelta,
1410 cachedelta,
1405 excluded_bases,
1411 excluded_bases,
1406 target_rev,
1412 target_rev,
1407 snapshot_cache=self._snapshot_cache,
1413 snapshot_cache=self._snapshot_cache,
1408 )
1414 )
1409 candidaterevs = next(groups)
1415 candidaterevs = next(groups)
1410 while candidaterevs is not None:
1416 while candidaterevs is not None:
1411 dbg_try_rounds += 1
1417 dbg_try_rounds += 1
1412 if self._debug_search:
1418 if self._debug_search:
1413 prev = None
1419 prev = None
1414 if deltainfo is not None:
1420 if deltainfo is not None:
1415 prev = deltainfo.base
1421 prev = deltainfo.base
1416
1422
1417 if (
1423 if (
1418 cachedelta is not None
1424 cachedelta is not None
1419 and len(candidaterevs) == 1
1425 and len(candidaterevs) == 1
1420 and cachedelta[0] in candidaterevs
1426 and cachedelta[0] in candidaterevs
1421 ):
1427 ):
1422 round_type = b"cached-delta"
1428 round_type = b"cached-delta"
1423 elif p1r in candidaterevs or p2r in candidaterevs:
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1424 round_type = b"parents"
1430 round_type = b"parents"
1425 elif prev is not None and all(c < prev for c in candidaterevs):
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1426 round_type = b"refine-down"
1432 round_type = b"refine-down"
1427 elif prev is not None and all(c > prev for c in candidaterevs):
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1428 round_type = b"refine-up"
1434 round_type = b"refine-up"
1429 else:
1435 else:
1430 round_type = b"search-down"
1436 round_type = b"search-down"
1431 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1432 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1433 self._write_debug(msg)
1439 self._write_debug(msg)
1434 nominateddeltas = []
1440 nominateddeltas = []
1435 if deltainfo is not None:
1441 if deltainfo is not None:
1436 if self._debug_search:
1442 if self._debug_search:
1437 msg = (
1443 msg = (
1438 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1439 )
1445 )
1440 msg %= (deltainfo.base, deltainfo.deltalen)
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1441 self._write_debug(msg)
1447 self._write_debug(msg)
1442 # if we already found a good delta,
1448 # if we already found a good delta,
1443 # challenge it against refined candidates
1449 # challenge it against refined candidates
1444 nominateddeltas.append(deltainfo)
1450 nominateddeltas.append(deltainfo)
1445 for candidaterev in candidaterevs:
1451 for candidaterev in candidaterevs:
1446 if self._debug_search:
1452 if self._debug_search:
1447 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1448 msg %= candidaterev
1454 msg %= candidaterev
1449 self._write_debug(msg)
1455 self._write_debug(msg)
1450 candidate_type = None
1456 candidate_type = None
1451 if candidaterev == p1r:
1457 if candidaterev == p1r:
1452 candidate_type = b"p1"
1458 candidate_type = b"p1"
1453 elif candidaterev == p2r:
1459 elif candidaterev == p2r:
1454 candidate_type = b"p2"
1460 candidate_type = b"p2"
1455 elif self.revlog.issnapshot(candidaterev):
1461 elif self.revlog.issnapshot(candidaterev):
1456 candidate_type = b"snapshot-%d"
1462 candidate_type = b"snapshot-%d"
1457 candidate_type %= self.revlog.snapshotdepth(
1463 candidate_type %= self.revlog.snapshotdepth(
1458 candidaterev
1464 candidaterev
1459 )
1465 )
1460
1466
1461 if candidate_type is not None:
1467 if candidate_type is not None:
1462 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1463 msg %= candidate_type
1469 msg %= candidate_type
1464 self._write_debug(msg)
1470 self._write_debug(msg)
1465 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1466 msg %= self.revlog.length(candidaterev)
1472 msg %= self.revlog.length(candidaterev)
1467 self._write_debug(msg)
1473 self._write_debug(msg)
1468 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1469 msg %= self.revlog.deltaparent(candidaterev)
1475 msg %= self.revlog.deltaparent(candidaterev)
1470 self._write_debug(msg)
1476 self._write_debug(msg)
1471
1477
1472 dbg_try_count += 1
1478 dbg_try_count += 1
1473
1479
1474 if self._debug_search:
1480 if self._debug_search:
1475 delta_start = util.timer()
1481 delta_start = util.timer()
1476 candidatedelta = self._builddeltainfo(
1482 candidatedelta = self._builddeltainfo(
1477 revinfo,
1483 revinfo,
1478 candidaterev,
1484 candidaterev,
1479 target_rev=target_rev,
1485 target_rev=target_rev,
1480 )
1486 )
1481 if self._debug_search:
1487 if self._debug_search:
1482 delta_end = util.timer()
1488 delta_end = util.timer()
1483 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1489 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1484 msg %= delta_end - delta_start
1490 msg %= delta_end - delta_start
1485 self._write_debug(msg)
1491 self._write_debug(msg)
1486 if candidatedelta is not None:
1492 if candidatedelta is not None:
1487 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1493 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1488 if self._debug_search:
1494 if self._debug_search:
1489 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1495 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1490 msg %= candidatedelta.deltalen
1496 msg %= candidatedelta.deltalen
1491 self._write_debug(msg)
1497 self._write_debug(msg)
1492 nominateddeltas.append(candidatedelta)
1498 nominateddeltas.append(candidatedelta)
1493 elif self._debug_search:
1499 elif self._debug_search:
1494 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1500 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1495 msg %= candidatedelta.deltalen
1501 msg %= candidatedelta.deltalen
1496 self._write_debug(msg)
1502 self._write_debug(msg)
1497 elif self._debug_search:
1503 elif self._debug_search:
1498 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1504 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1499 self._write_debug(msg)
1505 self._write_debug(msg)
1500 if nominateddeltas:
1506 if nominateddeltas:
1501 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1507 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1502 if deltainfo is not None:
1508 if deltainfo is not None:
1503 candidaterevs = groups.send(deltainfo.base)
1509 candidaterevs = groups.send(deltainfo.base)
1504 else:
1510 else:
1505 candidaterevs = next(groups)
1511 candidaterevs = next(groups)
1506
1512
1507 if deltainfo is None:
1513 if deltainfo is None:
1508 dbg_type = b"full"
1514 dbg_type = b"full"
1509 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1515 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1510 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1516 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1511 dbg_type = b"snapshot"
1517 dbg_type = b"snapshot"
1512 else:
1518 else:
1513 dbg_type = b"delta"
1519 dbg_type = b"delta"
1514
1520
1515 if gather_debug:
1521 if gather_debug:
1516 end = util.timer()
1522 end = util.timer()
1517 if dbg_type == b'full':
1523 if dbg_type == b'full':
1518 used_cached = (
1524 used_cached = (
1519 cachedelta is not None
1525 cachedelta is not None
1520 and dbg_try_rounds == 0
1526 and dbg_try_rounds == 0
1521 and dbg_try_count == 0
1527 and dbg_try_count == 0
1522 and cachedelta[0] == nullrev
1528 and cachedelta[0] == nullrev
1523 )
1529 )
1524 else:
1530 else:
1525 used_cached = (
1531 used_cached = (
1526 cachedelta is not None
1532 cachedelta is not None
1527 and dbg_try_rounds == 1
1533 and dbg_try_rounds == 1
1528 and dbg_try_count == 1
1534 and dbg_try_count == 1
1529 and deltainfo.base == cachedelta[0]
1535 and deltainfo.base == cachedelta[0]
1530 )
1536 )
1531 dbg['duration'] = end - start
1537 dbg['duration'] = end - start
1532 dbg[
1538 dbg[
1533 'delta-base'
1539 'delta-base'
1534 ] = deltainfo.base # pytype: disable=attribute-error
1540 ] = deltainfo.base # pytype: disable=attribute-error
1535 dbg['search_round_count'] = dbg_try_rounds
1541 dbg['search_round_count'] = dbg_try_rounds
1536 dbg['using-cached-base'] = used_cached
1542 dbg['using-cached-base'] = used_cached
1537 dbg['delta_try_count'] = dbg_try_count
1543 dbg['delta_try_count'] = dbg_try_count
1538 dbg['type'] = dbg_type
1544 dbg['type'] = dbg_type
1539 if (
1545 if (
1540 deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 deltainfo.snapshotdepth # pytype: disable=attribute-error
1541 is not None
1547 is not None
1542 ):
1548 ):
1543 dbg[
1549 dbg[
1544 'snapshot-depth'
1550 'snapshot-depth'
1545 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1551 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1546 else:
1552 else:
1547 dbg['snapshot-depth'] = 0
1553 dbg['snapshot-depth'] = 0
1548 self._dbg_process_data(dbg)
1554 self._dbg_process_data(dbg)
1549 return deltainfo
1555 return deltainfo
1550
1556
1551 def _one_dbg_data(self):
1557 def _one_dbg_data(self):
1552 return {
1558 return {
1553 'duration': None,
1559 'duration': None,
1554 'revision': None,
1560 'revision': None,
1555 'delta-base': None,
1561 'delta-base': None,
1556 'search_round_count': None,
1562 'search_round_count': None,
1557 'using-cached-base': None,
1563 'using-cached-base': None,
1558 'delta_try_count': None,
1564 'delta_try_count': None,
1559 'type': None,
1565 'type': None,
1560 'p1-chain-len': None,
1566 'p1-chain-len': None,
1561 'p2-chain-len': None,
1567 'p2-chain-len': None,
1562 'snapshot-depth': None,
1568 'snapshot-depth': None,
1563 'target-revlog': None,
1569 'target-revlog': None,
1564 }
1570 }
1565
1571
1566 def _dbg_process_data(self, dbg):
1572 def _dbg_process_data(self, dbg):
1567 if self._debug_info is not None:
1573 if self._debug_info is not None:
1568 self._debug_info.append(dbg)
1574 self._debug_info.append(dbg)
1569
1575
1570 if self._write_debug is not None:
1576 if self._write_debug is not None:
1571 msg = (
1577 msg = (
1572 b"DBG-DELTAS:"
1578 b"DBG-DELTAS:"
1573 b" %-12s"
1579 b" %-12s"
1574 b" rev=%d:"
1580 b" rev=%d:"
1575 b" delta-base=%d"
1581 b" delta-base=%d"
1576 b" is-cached=%d"
1582 b" is-cached=%d"
1577 b" - search-rounds=%d"
1583 b" - search-rounds=%d"
1578 b" try-count=%d"
1584 b" try-count=%d"
1579 b" - delta-type=%-6s"
1585 b" - delta-type=%-6s"
1580 b" snap-depth=%d"
1586 b" snap-depth=%d"
1581 b" - p1-chain-length=%d"
1587 b" - p1-chain-length=%d"
1582 b" p2-chain-length=%d"
1588 b" p2-chain-length=%d"
1583 b" - duration=%f"
1589 b" - duration=%f"
1584 b"\n"
1590 b"\n"
1585 )
1591 )
1586 msg %= (
1592 msg %= (
1587 dbg["target-revlog"],
1593 dbg["target-revlog"],
1588 dbg["revision"],
1594 dbg["revision"],
1589 dbg["delta-base"],
1595 dbg["delta-base"],
1590 dbg["using-cached-base"],
1596 dbg["using-cached-base"],
1591 dbg["search_round_count"],
1597 dbg["search_round_count"],
1592 dbg["delta_try_count"],
1598 dbg["delta_try_count"],
1593 dbg["type"],
1599 dbg["type"],
1594 dbg["snapshot-depth"],
1600 dbg["snapshot-depth"],
1595 dbg["p1-chain-len"],
1601 dbg["p1-chain-len"],
1596 dbg["p2-chain-len"],
1602 dbg["p2-chain-len"],
1597 dbg["duration"],
1603 dbg["duration"],
1598 )
1604 )
1599 self._write_debug(msg)
1605 self._write_debug(msg)
1600
1606
1601
1607
1602 def delta_compression(default_compression_header, deltainfo):
1608 def delta_compression(default_compression_header, deltainfo):
1603 """return (COMPRESSION_MODE, deltainfo)
1609 """return (COMPRESSION_MODE, deltainfo)
1604
1610
1605 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1611 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1606 compression.
1612 compression.
1607 """
1613 """
1608 h, d = deltainfo.data
1614 h, d = deltainfo.data
1609 compression_mode = COMP_MODE_INLINE
1615 compression_mode = COMP_MODE_INLINE
1610 if not h and not d:
1616 if not h and not d:
1611 # not data to store at all... declare them uncompressed
1617 # not data to store at all... declare them uncompressed
1612 compression_mode = COMP_MODE_PLAIN
1618 compression_mode = COMP_MODE_PLAIN
1613 elif not h:
1619 elif not h:
1614 t = d[0:1]
1620 t = d[0:1]
1615 if t == b'\0':
1621 if t == b'\0':
1616 compression_mode = COMP_MODE_PLAIN
1622 compression_mode = COMP_MODE_PLAIN
1617 elif t == default_compression_header:
1623 elif t == default_compression_header:
1618 compression_mode = COMP_MODE_DEFAULT
1624 compression_mode = COMP_MODE_DEFAULT
1619 elif h == b'u':
1625 elif h == b'u':
1620 # we have a more efficient way to declare uncompressed
1626 # we have a more efficient way to declare uncompressed
1621 h = b''
1627 h = b''
1622 compression_mode = COMP_MODE_PLAIN
1628 compression_mode = COMP_MODE_PLAIN
1623 deltainfo = drop_u_compression(deltainfo)
1629 deltainfo = drop_u_compression(deltainfo)
1624 return compression_mode, deltainfo
1630 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now