##// END OF EJS Templates
delta-find: pass the full deltainfo to the _DeltaSearch class...
marmoute -
r52253:99869dcf default
parent child Browse files
Show More
@@ -1,1887 +1,1883 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import abc
11 import abc
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 from .. import revlog
51 from .. import revlog
52
52
53 self._data = data
53 self._data = data
54 self.data_config = revlog.DataConfig()
54 self.data_config = revlog.DataConfig()
55 self.data_config.sr_density_threshold = density
55 self.data_config.sr_density_threshold = density
56 self.data_config.sr_min_gap_size = mingap
56 self.data_config.sr_min_gap_size = mingap
57 self.delta_config = revlog.DeltaConfig()
57 self.delta_config = revlog.DeltaConfig()
58 self.feature_config = revlog.FeatureConfig()
58 self.feature_config = revlog.FeatureConfig()
59 self._snapshot = set(snapshot)
59 self._snapshot = set(snapshot)
60 self.index = None
60 self.index = None
61
61
62 def start(self, rev):
62 def start(self, rev):
63 if rev == nullrev:
63 if rev == nullrev:
64 return 0
64 return 0
65 if rev == 0:
65 if rev == 0:
66 return 0
66 return 0
67 return self._data[rev - 1]
67 return self._data[rev - 1]
68
68
69 def end(self, rev):
69 def end(self, rev):
70 if rev == nullrev:
70 if rev == nullrev:
71 return 0
71 return 0
72 return self._data[rev]
72 return self._data[rev]
73
73
74 def length(self, rev):
74 def length(self, rev):
75 return self.end(rev) - self.start(rev)
75 return self.end(rev) - self.start(rev)
76
76
77 def __len__(self):
77 def __len__(self):
78 return len(self._data)
78 return len(self._data)
79
79
80 def issnapshot(self, rev):
80 def issnapshot(self, rev):
81 if rev == nullrev:
81 if rev == nullrev:
82 return True
82 return True
83 return rev in self._snapshot
83 return rev in self._snapshot
84
84
85
85
86 def slicechunk(revlog, revs, targetsize=None):
86 def slicechunk(revlog, revs, targetsize=None):
87 """slice revs to reduce the amount of unrelated data to be read from disk.
87 """slice revs to reduce the amount of unrelated data to be read from disk.
88
88
89 ``revs`` is sliced into groups that should be read in one time.
89 ``revs`` is sliced into groups that should be read in one time.
90 Assume that revs are sorted.
90 Assume that revs are sorted.
91
91
92 The initial chunk is sliced until the overall density (payload/chunks-span
92 The initial chunk is sliced until the overall density (payload/chunks-span
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
94 than `revlog.data_config.sr_min_gap_size` is skipped.
94 than `revlog.data_config.sr_min_gap_size` is skipped.
95
95
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
97 For consistency with other slicing choice, this limit won't go lower than
97 For consistency with other slicing choice, this limit won't go lower than
98 `revlog.data_config.sr_min_gap_size`.
98 `revlog.data_config.sr_min_gap_size`.
99
99
100 If individual revisions chunk are larger than this limit, they will still
100 If individual revisions chunk are larger than this limit, they will still
101 be raised individually.
101 be raised individually.
102
102
103 >>> data = [
103 >>> data = [
104 ... 5, #00 (5)
104 ... 5, #00 (5)
105 ... 10, #01 (5)
105 ... 10, #01 (5)
106 ... 12, #02 (2)
106 ... 12, #02 (2)
107 ... 12, #03 (empty)
107 ... 12, #03 (empty)
108 ... 27, #04 (15)
108 ... 27, #04 (15)
109 ... 31, #05 (4)
109 ... 31, #05 (4)
110 ... 31, #06 (empty)
110 ... 31, #06 (empty)
111 ... 42, #07 (11)
111 ... 42, #07 (11)
112 ... 47, #08 (5)
112 ... 47, #08 (5)
113 ... 47, #09 (empty)
113 ... 47, #09 (empty)
114 ... 48, #10 (1)
114 ... 48, #10 (1)
115 ... 51, #11 (3)
115 ... 51, #11 (3)
116 ... 74, #12 (23)
116 ... 74, #12 (23)
117 ... 85, #13 (11)
117 ... 85, #13 (11)
118 ... 86, #14 (1)
118 ... 86, #14 (1)
119 ... 91, #15 (5)
119 ... 91, #15 (5)
120 ... ]
120 ... ]
121 >>> revlog = _testrevlog(data, snapshot=range(16))
121 >>> revlog = _testrevlog(data, snapshot=range(16))
122
122
123 >>> list(slicechunk(revlog, list(range(16))))
123 >>> list(slicechunk(revlog, list(range(16))))
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
125 >>> list(slicechunk(revlog, [0, 15]))
125 >>> list(slicechunk(revlog, [0, 15]))
126 [[0], [15]]
126 [[0], [15]]
127 >>> list(slicechunk(revlog, [0, 11, 15]))
127 >>> list(slicechunk(revlog, [0, 11, 15]))
128 [[0], [11], [15]]
128 [[0], [11], [15]]
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
130 [[0], [11, 13, 15]]
130 [[0], [11, 13, 15]]
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
132 [[1, 2], [5, 8, 10, 11], [14]]
132 [[1, 2], [5, 8, 10, 11], [14]]
133
133
134 Slicing with a maximum chunk size
134 Slicing with a maximum chunk size
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
136 [[0], [11], [13], [15]]
136 [[0], [11], [13], [15]]
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
138 [[0], [11], [13, 15]]
138 [[0], [11], [13, 15]]
139
139
140 Slicing involving nullrev
140 Slicing involving nullrev
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
142 [[-1, 0], [11], [13, 15]]
142 [[-1, 0], [11], [13, 15]]
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
144 [[-1], [13], [15]]
144 [[-1], [13], [15]]
145 """
145 """
146 if targetsize is not None:
146 if targetsize is not None:
147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
148 # targetsize should not be specified when evaluating delta candidates:
148 # targetsize should not be specified when evaluating delta candidates:
149 # * targetsize is used to ensure we stay within specification when reading,
149 # * targetsize is used to ensure we stay within specification when reading,
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
151 if densityslicing is None:
151 if densityslicing is None:
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
153 for chunk in densityslicing(
153 for chunk in densityslicing(
154 revs,
154 revs,
155 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_density_threshold,
156 revlog.data_config.sr_min_gap_size,
156 revlog.data_config.sr_min_gap_size,
157 ):
157 ):
158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
159 yield subchunk
159 yield subchunk
160
160
161
161
162 def _slicechunktosize(revlog, revs, targetsize=None):
162 def _slicechunktosize(revlog, revs, targetsize=None):
163 """slice revs to match the target size
163 """slice revs to match the target size
164
164
165 This is intended to be used on chunk that density slicing selected by that
165 This is intended to be used on chunk that density slicing selected by that
166 are still too large compared to the read garantee of revlog. This might
166 are still too large compared to the read garantee of revlog. This might
167 happens when "minimal gap size" interrupted the slicing or when chain are
167 happens when "minimal gap size" interrupted the slicing or when chain are
168 built in a way that create large blocks next to each other.
168 built in a way that create large blocks next to each other.
169
169
170 >>> data = [
170 >>> data = [
171 ... 3, #0 (3)
171 ... 3, #0 (3)
172 ... 5, #1 (2)
172 ... 5, #1 (2)
173 ... 6, #2 (1)
173 ... 6, #2 (1)
174 ... 8, #3 (2)
174 ... 8, #3 (2)
175 ... 8, #4 (empty)
175 ... 8, #4 (empty)
176 ... 11, #5 (3)
176 ... 11, #5 (3)
177 ... 12, #6 (1)
177 ... 12, #6 (1)
178 ... 13, #7 (1)
178 ... 13, #7 (1)
179 ... 14, #8 (1)
179 ... 14, #8 (1)
180 ... ]
180 ... ]
181
181
182 == All snapshots cases ==
182 == All snapshots cases ==
183 >>> revlog = _testrevlog(data, snapshot=range(9))
183 >>> revlog = _testrevlog(data, snapshot=range(9))
184
184
185 Cases where chunk is already small enough
185 Cases where chunk is already small enough
186 >>> list(_slicechunktosize(revlog, [0], 3))
186 >>> list(_slicechunktosize(revlog, [0], 3))
187 [[0]]
187 [[0]]
188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 >>> list(_slicechunktosize(revlog, [6, 7], 3))
189 [[6, 7]]
189 [[6, 7]]
190 >>> list(_slicechunktosize(revlog, [0], None))
190 >>> list(_slicechunktosize(revlog, [0], None))
191 [[0]]
191 [[0]]
192 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 >>> list(_slicechunktosize(revlog, [6, 7], None))
193 [[6, 7]]
193 [[6, 7]]
194
194
195 cases where we need actual slicing
195 cases where we need actual slicing
196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 >>> list(_slicechunktosize(revlog, [0, 1], 3))
197 [[0], [1]]
197 [[0], [1]]
198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 >>> list(_slicechunktosize(revlog, [1, 3], 3))
199 [[1], [3]]
199 [[1], [3]]
200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
201 [[1, 2], [3]]
201 [[1, 2], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 >>> list(_slicechunktosize(revlog, [3, 5], 3))
203 [[3], [5]]
203 [[3], [5]]
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
205 [[3], [5]]
205 [[3], [5]]
206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
207 [[5], [6, 7, 8]]
207 [[5], [6, 7, 8]]
208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
209 [[0], [1, 2], [3], [5], [6, 7, 8]]
209 [[0], [1, 2], [3], [5], [6, 7, 8]]
210
210
211 Case with too large individual chunk (must return valid chunk)
211 Case with too large individual chunk (must return valid chunk)
212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 >>> list(_slicechunktosize(revlog, [0, 1], 2))
213 [[0], [1]]
213 [[0], [1]]
214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 >>> list(_slicechunktosize(revlog, [1, 3], 1))
215 [[1], [3]]
215 [[1], [3]]
216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
217 [[3], [5]]
217 [[3], [5]]
218
218
219 == No Snapshot cases ==
219 == No Snapshot cases ==
220 >>> revlog = _testrevlog(data)
220 >>> revlog = _testrevlog(data)
221
221
222 Cases where chunk is already small enough
222 Cases where chunk is already small enough
223 >>> list(_slicechunktosize(revlog, [0], 3))
223 >>> list(_slicechunktosize(revlog, [0], 3))
224 [[0]]
224 [[0]]
225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 >>> list(_slicechunktosize(revlog, [6, 7], 3))
226 [[6, 7]]
226 [[6, 7]]
227 >>> list(_slicechunktosize(revlog, [0], None))
227 >>> list(_slicechunktosize(revlog, [0], None))
228 [[0]]
228 [[0]]
229 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 >>> list(_slicechunktosize(revlog, [6, 7], None))
230 [[6, 7]]
230 [[6, 7]]
231
231
232 cases where we need actual slicing
232 cases where we need actual slicing
233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 >>> list(_slicechunktosize(revlog, [0, 1], 3))
234 [[0], [1]]
234 [[0], [1]]
235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 >>> list(_slicechunktosize(revlog, [1, 3], 3))
236 [[1], [3]]
236 [[1], [3]]
237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
238 [[1], [2, 3]]
238 [[1], [2, 3]]
239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 >>> list(_slicechunktosize(revlog, [3, 5], 3))
240 [[3], [5]]
240 [[3], [5]]
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
242 [[3], [4, 5]]
242 [[3], [4, 5]]
243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
244 [[5], [6, 7, 8]]
244 [[5], [6, 7, 8]]
245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
246 [[0], [1, 2], [3], [5], [6, 7, 8]]
246 [[0], [1, 2], [3], [5], [6, 7, 8]]
247
247
248 Case with too large individual chunk (must return valid chunk)
248 Case with too large individual chunk (must return valid chunk)
249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 >>> list(_slicechunktosize(revlog, [0, 1], 2))
250 [[0], [1]]
250 [[0], [1]]
251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 >>> list(_slicechunktosize(revlog, [1, 3], 1))
252 [[1], [3]]
252 [[1], [3]]
253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
254 [[3], [5]]
254 [[3], [5]]
255
255
256 == mixed case ==
256 == mixed case ==
257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
260 """
260 """
261 assert targetsize is None or 0 <= targetsize
261 assert targetsize is None or 0 <= targetsize
262 startdata = revlog.start(revs[0])
262 startdata = revlog.start(revs[0])
263 enddata = revlog.end(revs[-1])
263 enddata = revlog.end(revs[-1])
264 fullspan = enddata - startdata
264 fullspan = enddata - startdata
265 if targetsize is None or fullspan <= targetsize:
265 if targetsize is None or fullspan <= targetsize:
266 yield revs
266 yield revs
267 return
267 return
268
268
269 startrevidx = 0
269 startrevidx = 0
270 endrevidx = 1
270 endrevidx = 1
271 iterrevs = enumerate(revs)
271 iterrevs = enumerate(revs)
272 next(iterrevs) # skip first rev.
272 next(iterrevs) # skip first rev.
273 # first step: get snapshots out of the way
273 # first step: get snapshots out of the way
274 for idx, r in iterrevs:
274 for idx, r in iterrevs:
275 span = revlog.end(r) - startdata
275 span = revlog.end(r) - startdata
276 snapshot = revlog.issnapshot(r)
276 snapshot = revlog.issnapshot(r)
277 if span <= targetsize and snapshot:
277 if span <= targetsize and snapshot:
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 else:
279 else:
280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
281 if chunk:
281 if chunk:
282 yield chunk
282 yield chunk
283 startrevidx = idx
283 startrevidx = idx
284 startdata = revlog.start(r)
284 startdata = revlog.start(r)
285 endrevidx = idx + 1
285 endrevidx = idx + 1
286 if not snapshot:
286 if not snapshot:
287 break
287 break
288
288
289 # for the others, we use binary slicing to quickly converge toward valid
289 # for the others, we use binary slicing to quickly converge toward valid
290 # chunks (otherwise, we might end up looking for start/end of many
290 # chunks (otherwise, we might end up looking for start/end of many
291 # revisions). This logic is not looking for the perfect slicing point, it
291 # revisions). This logic is not looking for the perfect slicing point, it
292 # focuses on quickly converging toward valid chunks.
292 # focuses on quickly converging toward valid chunks.
293 nbitem = len(revs)
293 nbitem = len(revs)
294 while (enddata - startdata) > targetsize:
294 while (enddata - startdata) > targetsize:
295 endrevidx = nbitem
295 endrevidx = nbitem
296 if nbitem - startrevidx <= 1:
296 if nbitem - startrevidx <= 1:
297 break # protect against individual chunk larger than limit
297 break # protect against individual chunk larger than limit
298 localenddata = revlog.end(revs[endrevidx - 1])
298 localenddata = revlog.end(revs[endrevidx - 1])
299 span = localenddata - startdata
299 span = localenddata - startdata
300 while span > targetsize:
300 while span > targetsize:
301 if endrevidx - startrevidx <= 1:
301 if endrevidx - startrevidx <= 1:
302 break # protect against individual chunk larger than limit
302 break # protect against individual chunk larger than limit
303 endrevidx -= (endrevidx - startrevidx) // 2
303 endrevidx -= (endrevidx - startrevidx) // 2
304 localenddata = revlog.end(revs[endrevidx - 1])
304 localenddata = revlog.end(revs[endrevidx - 1])
305 span = localenddata - startdata
305 span = localenddata - startdata
306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
307 if chunk:
307 if chunk:
308 yield chunk
308 yield chunk
309 startrevidx = endrevidx
309 startrevidx = endrevidx
310 startdata = revlog.start(revs[startrevidx])
310 startdata = revlog.start(revs[startrevidx])
311
311
312 chunk = _trimchunk(revlog, revs, startrevidx)
312 chunk = _trimchunk(revlog, revs, startrevidx)
313 if chunk:
313 if chunk:
314 yield chunk
314 yield chunk
315
315
316
316
317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
318 """slice revs to reduce the amount of unrelated data to be read from disk.
318 """slice revs to reduce the amount of unrelated data to be read from disk.
319
319
320 ``revs`` is sliced into groups that should be read in one time.
320 ``revs`` is sliced into groups that should be read in one time.
321 Assume that revs are sorted.
321 Assume that revs are sorted.
322
322
323 The initial chunk is sliced until the overall density (payload/chunks-span
323 The initial chunk is sliced until the overall density (payload/chunks-span
324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
325 skipped.
325 skipped.
326
326
327 >>> revlog = _testrevlog([
327 >>> revlog = _testrevlog([
328 ... 5, #00 (5)
328 ... 5, #00 (5)
329 ... 10, #01 (5)
329 ... 10, #01 (5)
330 ... 12, #02 (2)
330 ... 12, #02 (2)
331 ... 12, #03 (empty)
331 ... 12, #03 (empty)
332 ... 27, #04 (15)
332 ... 27, #04 (15)
333 ... 31, #05 (4)
333 ... 31, #05 (4)
334 ... 31, #06 (empty)
334 ... 31, #06 (empty)
335 ... 42, #07 (11)
335 ... 42, #07 (11)
336 ... 47, #08 (5)
336 ... 47, #08 (5)
337 ... 47, #09 (empty)
337 ... 47, #09 (empty)
338 ... 48, #10 (1)
338 ... 48, #10 (1)
339 ... 51, #11 (3)
339 ... 51, #11 (3)
340 ... 74, #12 (23)
340 ... 74, #12 (23)
341 ... 85, #13 (11)
341 ... 85, #13 (11)
342 ... 86, #14 (1)
342 ... 86, #14 (1)
343 ... 91, #15 (5)
343 ... 91, #15 (5)
344 ... ])
344 ... ])
345
345
346 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 >>> list(_slicechunktodensity(revlog, list(range(16))))
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
348 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 >>> list(_slicechunktodensity(revlog, [0, 15]))
349 [[0], [15]]
349 [[0], [15]]
350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
351 [[0], [11], [15]]
351 [[0], [11], [15]]
352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
353 [[0], [11, 13, 15]]
353 [[0], [11, 13, 15]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
355 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
357 ... mingapsize=20))
357 ... mingapsize=20))
358 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 [[1, 2, 3, 5, 8, 10, 11], [14]]
359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
360 ... targetdensity=0.95))
360 ... targetdensity=0.95))
361 [[1, 2], [5], [8, 10, 11], [14]]
361 [[1, 2], [5], [8, 10, 11], [14]]
362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
363 ... targetdensity=0.95, mingapsize=12))
363 ... targetdensity=0.95, mingapsize=12))
364 [[1, 2], [5, 8, 10, 11], [14]]
364 [[1, 2], [5, 8, 10, 11], [14]]
365 """
365 """
366 start = revlog.start
366 start = revlog.start
367 length = revlog.length
367 length = revlog.length
368
368
369 if len(revs) <= 1:
369 if len(revs) <= 1:
370 yield revs
370 yield revs
371 return
371 return
372
372
373 deltachainspan = segmentspan(revlog, revs)
373 deltachainspan = segmentspan(revlog, revs)
374
374
375 if deltachainspan < mingapsize:
375 if deltachainspan < mingapsize:
376 yield revs
376 yield revs
377 return
377 return
378
378
379 readdata = deltachainspan
379 readdata = deltachainspan
380 chainpayload = sum(length(r) for r in revs)
380 chainpayload = sum(length(r) for r in revs)
381
381
382 if deltachainspan:
382 if deltachainspan:
383 density = chainpayload / float(deltachainspan)
383 density = chainpayload / float(deltachainspan)
384 else:
384 else:
385 density = 1.0
385 density = 1.0
386
386
387 if density >= targetdensity:
387 if density >= targetdensity:
388 yield revs
388 yield revs
389 return
389 return
390
390
391 # Store the gaps in a heap to have them sorted by decreasing size
391 # Store the gaps in a heap to have them sorted by decreasing size
392 gaps = []
392 gaps = []
393 prevend = None
393 prevend = None
394 for i, rev in enumerate(revs):
394 for i, rev in enumerate(revs):
395 revstart = start(rev)
395 revstart = start(rev)
396 revlen = length(rev)
396 revlen = length(rev)
397
397
398 # Skip empty revisions to form larger holes
398 # Skip empty revisions to form larger holes
399 if revlen == 0:
399 if revlen == 0:
400 continue
400 continue
401
401
402 if prevend is not None:
402 if prevend is not None:
403 gapsize = revstart - prevend
403 gapsize = revstart - prevend
404 # only consider holes that are large enough
404 # only consider holes that are large enough
405 if gapsize > mingapsize:
405 if gapsize > mingapsize:
406 gaps.append((gapsize, i))
406 gaps.append((gapsize, i))
407
407
408 prevend = revstart + revlen
408 prevend = revstart + revlen
409 # sort the gaps to pop them from largest to small
409 # sort the gaps to pop them from largest to small
410 gaps.sort()
410 gaps.sort()
411
411
412 # Collect the indices of the largest holes until the density is acceptable
412 # Collect the indices of the largest holes until the density is acceptable
413 selected = []
413 selected = []
414 while gaps and density < targetdensity:
414 while gaps and density < targetdensity:
415 gapsize, gapidx = gaps.pop()
415 gapsize, gapidx = gaps.pop()
416
416
417 selected.append(gapidx)
417 selected.append(gapidx)
418
418
419 # the gap sizes are stored as negatives to be sorted decreasingly
419 # the gap sizes are stored as negatives to be sorted decreasingly
420 # by the heap
420 # by the heap
421 readdata -= gapsize
421 readdata -= gapsize
422 if readdata > 0:
422 if readdata > 0:
423 density = chainpayload / float(readdata)
423 density = chainpayload / float(readdata)
424 else:
424 else:
425 density = 1.0
425 density = 1.0
426 selected.sort()
426 selected.sort()
427
427
428 # Cut the revs at collected indices
428 # Cut the revs at collected indices
429 previdx = 0
429 previdx = 0
430 for idx in selected:
430 for idx in selected:
431
431
432 chunk = _trimchunk(revlog, revs, previdx, idx)
432 chunk = _trimchunk(revlog, revs, previdx, idx)
433 if chunk:
433 if chunk:
434 yield chunk
434 yield chunk
435
435
436 previdx = idx
436 previdx = idx
437
437
438 chunk = _trimchunk(revlog, revs, previdx)
438 chunk = _trimchunk(revlog, revs, previdx)
439 if chunk:
439 if chunk:
440 yield chunk
440 yield chunk
441
441
442
442
443 def _trimchunk(revlog, revs, startidx, endidx=None):
443 def _trimchunk(revlog, revs, startidx, endidx=None):
444 """returns revs[startidx:endidx] without empty trailing revs
444 """returns revs[startidx:endidx] without empty trailing revs
445
445
446 Doctest Setup
446 Doctest Setup
447 >>> revlog = _testrevlog([
447 >>> revlog = _testrevlog([
448 ... 5, #0
448 ... 5, #0
449 ... 10, #1
449 ... 10, #1
450 ... 12, #2
450 ... 12, #2
451 ... 12, #3 (empty)
451 ... 12, #3 (empty)
452 ... 17, #4
452 ... 17, #4
453 ... 21, #5
453 ... 21, #5
454 ... 21, #6 (empty)
454 ... 21, #6 (empty)
455 ... ])
455 ... ])
456
456
457 Contiguous cases:
457 Contiguous cases:
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
459 [0, 1, 2, 3, 4, 5]
459 [0, 1, 2, 3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
461 [0, 1, 2, 3, 4]
461 [0, 1, 2, 3, 4]
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
463 [0, 1, 2]
463 [0, 1, 2]
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
465 [2]
465 [2]
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
467 [3, 4, 5]
467 [3, 4, 5]
468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
469 [3, 4]
469 [3, 4]
470
470
471 Discontiguous cases:
471 Discontiguous cases:
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
473 [1, 3, 5]
473 [1, 3, 5]
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
475 [1]
475 [1]
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
477 [3, 5]
477 [3, 5]
478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
479 [3, 5]
479 [3, 5]
480 """
480 """
481 length = revlog.length
481 length = revlog.length
482
482
483 if endidx is None:
483 if endidx is None:
484 endidx = len(revs)
484 endidx = len(revs)
485
485
486 # If we have a non-emtpy delta candidate, there are nothing to trim
486 # If we have a non-emtpy delta candidate, there are nothing to trim
487 if revs[endidx - 1] < len(revlog):
487 if revs[endidx - 1] < len(revlog):
488 # Trim empty revs at the end, except the very first revision of a chain
488 # Trim empty revs at the end, except the very first revision of a chain
489 while (
489 while (
490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
491 ):
491 ):
492 endidx -= 1
492 endidx -= 1
493
493
494 return revs[startidx:endidx]
494 return revs[startidx:endidx]
495
495
496
496
497 def segmentspan(revlog, revs):
497 def segmentspan(revlog, revs):
498 """Get the byte span of a segment of revisions
498 """Get the byte span of a segment of revisions
499
499
500 revs is a sorted array of revision numbers
500 revs is a sorted array of revision numbers
501
501
502 >>> revlog = _testrevlog([
502 >>> revlog = _testrevlog([
503 ... 5, #0
503 ... 5, #0
504 ... 10, #1
504 ... 10, #1
505 ... 12, #2
505 ... 12, #2
506 ... 12, #3 (empty)
506 ... 12, #3 (empty)
507 ... 17, #4
507 ... 17, #4
508 ... ])
508 ... ])
509
509
510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
511 17
511 17
512 >>> segmentspan(revlog, [0, 4])
512 >>> segmentspan(revlog, [0, 4])
513 17
513 17
514 >>> segmentspan(revlog, [3, 4])
514 >>> segmentspan(revlog, [3, 4])
515 5
515 5
516 >>> segmentspan(revlog, [1, 2, 3,])
516 >>> segmentspan(revlog, [1, 2, 3,])
517 7
517 7
518 >>> segmentspan(revlog, [1, 3])
518 >>> segmentspan(revlog, [1, 3])
519 7
519 7
520 """
520 """
521 if not revs:
521 if not revs:
522 return 0
522 return 0
523 end = revlog.end(revs[-1])
523 end = revlog.end(revs[-1])
524 return end - revlog.start(revs[0])
524 return end - revlog.start(revs[0])
525
525
526
526
527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
528 """build full text from a (base, delta) pair and other metadata"""
528 """build full text from a (base, delta) pair and other metadata"""
529 # special case deltas which replace entire base; no need to decode
529 # special case deltas which replace entire base; no need to decode
530 # base revision. this neatly avoids censored bases, which throw when
530 # base revision. this neatly avoids censored bases, which throw when
531 # they're decoded.
531 # they're decoded.
532 hlen = struct.calcsize(b">lll")
532 hlen = struct.calcsize(b">lll")
533 if delta[:hlen] == mdiff.replacediffheader(
533 if delta[:hlen] == mdiff.replacediffheader(
534 revlog.rawsize(baserev), len(delta) - hlen
534 revlog.rawsize(baserev), len(delta) - hlen
535 ):
535 ):
536 fulltext = delta[hlen:]
536 fulltext = delta[hlen:]
537 else:
537 else:
538 # deltabase is rawtext before changed by flag processors, which is
538 # deltabase is rawtext before changed by flag processors, which is
539 # equivalent to non-raw text
539 # equivalent to non-raw text
540 basetext = revlog.revision(baserev)
540 basetext = revlog.revision(baserev)
541 fulltext = mdiff.patch(basetext, delta)
541 fulltext = mdiff.patch(basetext, delta)
542
542
543 try:
543 try:
544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
545 if validatehash:
545 if validatehash:
546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
547 if flags & REVIDX_ISCENSORED:
547 if flags & REVIDX_ISCENSORED:
548 raise error.StorageError(
548 raise error.StorageError(
549 _(b'node %s is not censored') % expectednode
549 _(b'node %s is not censored') % expectednode
550 )
550 )
551 except error.CensoredNodeError:
551 except error.CensoredNodeError:
552 # must pass the censored index flag to add censored revisions
552 # must pass the censored index flag to add censored revisions
553 if not flags & REVIDX_ISCENSORED:
553 if not flags & REVIDX_ISCENSORED:
554 raise
554 raise
555 return fulltext
555 return fulltext
556
556
557
557
558 @attr.s(slots=True, frozen=True)
558 @attr.s(slots=True, frozen=True)
559 class _deltainfo:
559 class _deltainfo:
560 distance = attr.ib()
560 distance = attr.ib()
561 deltalen = attr.ib()
561 deltalen = attr.ib()
562 data = attr.ib()
562 data = attr.ib()
563 base = attr.ib()
563 base = attr.ib()
564 chainbase = attr.ib()
564 chainbase = attr.ib()
565 chainlen = attr.ib()
565 chainlen = attr.ib()
566 compresseddeltalen = attr.ib()
566 compresseddeltalen = attr.ib()
567 snapshotdepth = attr.ib()
567 snapshotdepth = attr.ib()
568
568
569
569
570 def drop_u_compression(delta):
570 def drop_u_compression(delta):
571 """turn into a "u" (no-compression) into no-compression without header
571 """turn into a "u" (no-compression) into no-compression without header
572
572
573 This is useful for revlog format that has better compression method.
573 This is useful for revlog format that has better compression method.
574 """
574 """
575 assert delta.data[0] == b'u', delta.data[0]
575 assert delta.data[0] == b'u', delta.data[0]
576 return _deltainfo(
576 return _deltainfo(
577 delta.distance,
577 delta.distance,
578 delta.deltalen - 1,
578 delta.deltalen - 1,
579 (b'', delta.data[1]),
579 (b'', delta.data[1]),
580 delta.base,
580 delta.base,
581 delta.chainbase,
581 delta.chainbase,
582 delta.chainlen,
582 delta.chainlen,
583 delta.compresseddeltalen,
583 delta.compresseddeltalen,
584 delta.snapshotdepth,
584 delta.snapshotdepth,
585 )
585 )
586
586
587
587
588 # If a revision's full text is that much bigger than a base candidate full
588 # If a revision's full text is that much bigger than a base candidate full
589 # text's, it is very unlikely that it will produce a valid delta. We no longer
589 # text's, it is very unlikely that it will produce a valid delta. We no longer
590 # consider these candidates.
590 # consider these candidates.
591 LIMIT_BASE2TEXT = 500
591 LIMIT_BASE2TEXT = 500
592
592
593 ### stage of the search, used for debug and to select and to adjust some logic.
593 ### stage of the search, used for debug and to select and to adjust some logic.
594 # initial stage, next step is unknown
594 # initial stage, next step is unknown
595 _STAGE_UNSPECIFIED = "unspecified"
595 _STAGE_UNSPECIFIED = "unspecified"
596 # trying the cached delta
596 # trying the cached delta
597 _STAGE_CACHED = "cached"
597 _STAGE_CACHED = "cached"
598 # trying delta based on parents
598 # trying delta based on parents
599 _STAGE_PARENTS = "parents"
599 _STAGE_PARENTS = "parents"
600 # trying to build a valid snapshot of any level
600 # trying to build a valid snapshot of any level
601 _STAGE_SNAPSHOT = "snapshot"
601 _STAGE_SNAPSHOT = "snapshot"
602 # trying to build a delta based of the previous revision
602 # trying to build a delta based of the previous revision
603 _STAGE_PREV = "prev"
603 _STAGE_PREV = "prev"
604 # trying to build a full snapshot
604 # trying to build a full snapshot
605 _STAGE_FULL = "full"
605 _STAGE_FULL = "full"
606
606
607
607
608 class _BaseDeltaSearch(abc.ABC):
608 class _BaseDeltaSearch(abc.ABC):
609 """perform the search of a good delta for a single revlog revision
609 """perform the search of a good delta for a single revlog revision
610
610
611 note: some of the deltacomputer.finddeltainfo logic should probably move
611 note: some of the deltacomputer.finddeltainfo logic should probably move
612 here.
612 here.
613 """
613 """
614
614
615 def __init__(
615 def __init__(
616 self,
616 self,
617 revlog,
617 revlog,
618 revinfo,
618 revinfo,
619 p1,
619 p1,
620 p2,
620 p2,
621 cachedelta,
621 cachedelta,
622 excluded_bases=None,
622 excluded_bases=None,
623 target_rev=None,
623 target_rev=None,
624 snapshot_cache=None,
624 snapshot_cache=None,
625 ):
625 ):
626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
626 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
627 # so we should never end up asking such question. Adding the assert as
627 # so we should never end up asking such question. Adding the assert as
628 # a safe-guard to detect anything that would be fishy in this regard.
628 # a safe-guard to detect anything that would be fishy in this regard.
629 assert (
629 assert (
630 cachedelta is None
630 cachedelta is None
631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
631 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
632 or not revlog.delta_config.general_delta
632 or not revlog.delta_config.general_delta
633 )
633 )
634 self.revlog = revlog
634 self.revlog = revlog
635 self.revinfo = revinfo
635 self.revinfo = revinfo
636 self.textlen = revinfo.textlen
636 self.textlen = revinfo.textlen
637 self.p1 = p1
637 self.p1 = p1
638 self.p2 = p2
638 self.p2 = p2
639 self.cachedelta = cachedelta
639 self.cachedelta = cachedelta
640 self.excluded_bases = excluded_bases
640 self.excluded_bases = excluded_bases
641 if target_rev is None:
641 if target_rev is None:
642 self.target_rev = len(self.revlog)
642 self.target_rev = len(self.revlog)
643 self.target_rev = target_rev
643 self.target_rev = target_rev
644 if snapshot_cache is None:
644 if snapshot_cache is None:
645 # map: base-rev: [snapshot-revs]
645 # map: base-rev: [snapshot-revs]
646 snapshot_cache = SnapshotCache()
646 snapshot_cache = SnapshotCache()
647 self.snapshot_cache = snapshot_cache
647 self.snapshot_cache = snapshot_cache
648
648
649 self.tested = {nullrev}
649 self.tested = {nullrev}
650
650
651 self.current_stage = _STAGE_UNSPECIFIED
651 self.current_stage = _STAGE_UNSPECIFIED
652 self.current_group = None
652 self.current_group = None
653 self._init_group()
653 self._init_group()
654
654
655 def is_good_delta_info(self, deltainfo):
655 def is_good_delta_info(self, deltainfo):
656 """Returns True if the given delta is good.
656 """Returns True if the given delta is good.
657
657
658 Good means that it is within the disk span, disk size, and chain length
658 Good means that it is within the disk span, disk size, and chain length
659 bounds that we know to be performant.
659 bounds that we know to be performant.
660 """
660 """
661 if not self._is_good_delta_info_universal(deltainfo):
661 if not self._is_good_delta_info_universal(deltainfo):
662 return False
662 return False
663 if not self._is_good_delta_info_chain_quality(deltainfo):
663 if not self._is_good_delta_info_chain_quality(deltainfo):
664 return False
664 return False
665 return True
665 return True
666
666
667 def _is_good_delta_info_universal(self, deltainfo):
667 def _is_good_delta_info_universal(self, deltainfo):
668 """Returns True if the given delta is good.
668 """Returns True if the given delta is good.
669
669
670 This performs generic checks needed by all format variants.
670 This performs generic checks needed by all format variants.
671
671
672 This is used by is_good_delta_info.
672 This is used by is_good_delta_info.
673 """
673 """
674
674
675 if deltainfo is None:
675 if deltainfo is None:
676 return False
676 return False
677
677
678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
678 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
679 # so we should never end up asking such question. Adding the assert as
679 # so we should never end up asking such question. Adding the assert as
680 # a safe-guard to detect anything that would be fishy in this regard.
680 # a safe-guard to detect anything that would be fishy in this regard.
681 assert (
681 assert (
682 self.revinfo.cachedelta is None
682 self.revinfo.cachedelta is None
683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
683 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
684 or not self.revlog.delta_config.general_delta
684 or not self.revlog.delta_config.general_delta
685 )
685 )
686
686
687 # Bad delta from new delta size:
687 # Bad delta from new delta size:
688 #
688 #
689 # If the delta size is larger than the target text, storing the delta
689 # If the delta size is larger than the target text, storing the delta
690 # will be inefficient.
690 # will be inefficient.
691 if self.revinfo.textlen < deltainfo.deltalen:
691 if self.revinfo.textlen < deltainfo.deltalen:
692 return False
692 return False
693
693
694 return True
694 return True
695
695
696 def _is_good_delta_info_chain_quality(self, deltainfo):
696 def _is_good_delta_info_chain_quality(self, deltainfo):
697 """Returns True if the chain associated with the delta is good.
697 """Returns True if the chain associated with the delta is good.
698
698
699 This performs checks for format that use delta chains.
699 This performs checks for format that use delta chains.
700
700
701 This is used by is_good_delta_info.
701 This is used by is_good_delta_info.
702 """
702 """
703 # - 'deltainfo.distance' is the distance from the base revision --
703 # - 'deltainfo.distance' is the distance from the base revision --
704 # bounding it limits the amount of I/O we need to do.
704 # bounding it limits the amount of I/O we need to do.
705
705
706 defaultmax = self.revinfo.textlen * 4
706 defaultmax = self.revinfo.textlen * 4
707 maxdist = self.revlog.delta_config.max_deltachain_span
707 maxdist = self.revlog.delta_config.max_deltachain_span
708 if not maxdist:
708 if not maxdist:
709 maxdist = deltainfo.distance # ensure the conditional pass
709 maxdist = deltainfo.distance # ensure the conditional pass
710 maxdist = max(maxdist, defaultmax)
710 maxdist = max(maxdist, defaultmax)
711
711
712 # Bad delta from read span:
712 # Bad delta from read span:
713 #
713 #
714 # If the span of data read is larger than the maximum allowed.
714 # If the span of data read is larger than the maximum allowed.
715 #
715 #
716 # In the sparse-revlog case, we rely on the associated "sparse
716 # In the sparse-revlog case, we rely on the associated "sparse
717 # reading" to avoid issue related to the span of data. In theory, it
717 # reading" to avoid issue related to the span of data. In theory, it
718 # would be possible to build pathological revlog where delta pattern
718 # would be possible to build pathological revlog where delta pattern
719 # would lead to too many reads. However, they do not happen in
719 # would lead to too many reads. However, they do not happen in
720 # practice at all. So we skip the span check entirely.
720 # practice at all. So we skip the span check entirely.
721 if (
721 if (
722 not self.revlog.delta_config.sparse_revlog
722 not self.revlog.delta_config.sparse_revlog
723 and maxdist < deltainfo.distance
723 and maxdist < deltainfo.distance
724 ):
724 ):
725 return False
725 return False
726
726
727 # Bad delta from cumulated payload size:
727 # Bad delta from cumulated payload size:
728 #
728 #
729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
729 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
730 # deltas we need to apply -- bounding it limits the amount of CPU
730 # deltas we need to apply -- bounding it limits the amount of CPU
731 # we consume.
731 # we consume.
732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
732 max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT
733 # If the sum of delta get larger than K * target text length.
733 # If the sum of delta get larger than K * target text length.
734 if max_chain_data < deltainfo.compresseddeltalen:
734 if max_chain_data < deltainfo.compresseddeltalen:
735 return False
735 return False
736
736
737 # Bad delta from chain length:
737 # Bad delta from chain length:
738 #
738 #
739 # If the number of delta in the chain gets too high.
739 # If the number of delta in the chain gets too high.
740 if (
740 if (
741 self.revlog.delta_config.max_chain_len
741 self.revlog.delta_config.max_chain_len
742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
742 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
743 ):
743 ):
744 return False
744 return False
745 return True
745 return True
746
746
747 @property
747 @property
748 def done(self):
748 def done(self):
749 """True when all possible candidate have been tested"""
749 """True when all possible candidate have been tested"""
750 return self.current_group is None
750 return self.current_group is None
751
751
752 @abc.abstractmethod
752 @abc.abstractmethod
753 def next_group(self, good_delta=None):
753 def next_group(self, good_delta=None):
754 """move to the next group to test
754 """move to the next group to test
755
755
756 The group of revision to test will be available in
756 The group of revision to test will be available in
757 `self.current_group`. If the previous group had any good delta, the
757 `self.current_group`. If the previous group had any good delta, the
758 best one can be passed as the `good_delta` parameter to help selecting
758 best one can be passed as the `good_delta` parameter to help selecting
759 the next group.
759 the next group.
760
760
761 If not revision remains to be, `self.done` will be True and
761 If not revision remains to be, `self.done` will be True and
762 `self.current_group` will be None.
762 `self.current_group` will be None.
763 """
763 """
764 pass
764 pass
765
765
766 @abc.abstractmethod
766 @abc.abstractmethod
767 def _init_group(self):
767 def _init_group(self):
768 pass
768 pass
769
769
770
770
771 class _NoDeltaSearch(_BaseDeltaSearch):
771 class _NoDeltaSearch(_BaseDeltaSearch):
772 """Search for no delta.
772 """Search for no delta.
773
773
774 This search variant is to be used in case where we should not store delta.
774 This search variant is to be used in case where we should not store delta.
775 """
775 """
776
776
777 def _init_group(self):
777 def _init_group(self):
778 self.current_stage = _STAGE_FULL
778 self.current_stage = _STAGE_FULL
779
779
780 def next_group(self, good_delta=None):
780 def next_group(self, good_delta=None):
781 pass
781 pass
782
782
783
783
784 class _PrevDeltaSearch(_BaseDeltaSearch):
784 class _PrevDeltaSearch(_BaseDeltaSearch):
785 """Search for delta against the previous revision only
785 """Search for delta against the previous revision only
786
786
787 This search variant is to be used when the format does not allow for delta
787 This search variant is to be used when the format does not allow for delta
788 against arbitrary bases.
788 against arbitrary bases.
789 """
789 """
790
790
791 def _init_group(self):
791 def _init_group(self):
792 self.current_stage = _STAGE_PREV
792 self.current_stage = _STAGE_PREV
793 self.current_group = [self.target_rev - 1]
793 self.current_group = [self.target_rev - 1]
794 self.tested.update(self.current_group)
794 self.tested.update(self.current_group)
795
795
796 def next_group(self, good_delta=None):
796 def next_group(self, good_delta=None):
797 self.current_stage = _STAGE_FULL
797 self.current_stage = _STAGE_FULL
798 self.current_group = None
798 self.current_group = None
799
799
800
800
801 class _GeneralDeltaSearch(_BaseDeltaSearch):
801 class _GeneralDeltaSearch(_BaseDeltaSearch):
802 """Delta search variant for general-delta repository"""
802 """Delta search variant for general-delta repository"""
803
803
804 def _init_group(self):
804 def _init_group(self):
805 # Why search for delta base if we cannot use a delta base ?
805 # Why search for delta base if we cannot use a delta base ?
806 # also see issue6056
806 # also see issue6056
807 assert self.revlog.delta_config.general_delta
807 assert self.revlog.delta_config.general_delta
808 self._candidates_iterator = self._iter_groups()
808 self._candidates_iterator = self._iter_groups()
809 self._last_good = None
809 self._last_good = None
810 if (
810 if (
811 self.cachedelta is not None
811 self.cachedelta is not None
812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
812 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
813 and self._pre_filter_rev(self.cachedelta[0])
813 and self._pre_filter_rev(self.cachedelta[0])
814 ):
814 ):
815 # First we try to reuse a the delta contained in the bundle. (or from
815 # First we try to reuse a the delta contained in the bundle. (or from
816 # the source revlog)
816 # the source revlog)
817 #
817 #
818 # This logic only applies to general delta repositories and can be
818 # This logic only applies to general delta repositories and can be
819 # disabled through configuration. Disabling reuse source delta is
819 # disabled through configuration. Disabling reuse source delta is
820 # useful when we want to make sure we recomputed "optimal" deltas.
820 # useful when we want to make sure we recomputed "optimal" deltas.
821 self.current_stage = _STAGE_CACHED
821 self.current_stage = _STAGE_CACHED
822 self._internal_group = (self.cachedelta[0],)
822 self._internal_group = (self.cachedelta[0],)
823 self._internal_idx = 0
823 self._internal_idx = 0
824 self.current_group = self._internal_group
824 self.current_group = self._internal_group
825 self.tested.update(self.current_group)
825 self.tested.update(self.current_group)
826 else:
826 else:
827 self._next_internal_group()
827 self._next_internal_group()
828
828
829 def _next_internal_group(self):
829 def _next_internal_group(self):
830 # self._internal_group can be larger than self.current_group
830 # self._internal_group can be larger than self.current_group
831 self._internal_idx = 0
831 self._internal_idx = 0
832 group = self._candidates_iterator.send(self._last_good)
832 group = self._candidates_iterator.send(self._last_good)
833 if group is not None:
833 if group is not None:
834 group = self._pre_filter_candidate_revs(group)
834 group = self._pre_filter_candidate_revs(group)
835 self._internal_group = group
835 self._internal_group = group
836 if self._internal_group is None:
836 if self._internal_group is None:
837 self.current_group = None
837 self.current_group = None
838 elif len(self._internal_group) == 0:
838 elif len(self._internal_group) == 0:
839 self.next_group()
839 self.next_group()
840 else:
840 else:
841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
841 chunk_size = self.revlog.delta_config.candidate_group_chunk_size
842 if chunk_size > 0:
842 if chunk_size > 0:
843 self.current_group = self._internal_group[:chunk_size]
843 self.current_group = self._internal_group[:chunk_size]
844 self._internal_idx += chunk_size
844 self._internal_idx += chunk_size
845 else:
845 else:
846 self.current_group = self._internal_group
846 self.current_group = self._internal_group
847 self._internal_idx += len(self.current_group)
847 self._internal_idx += len(self.current_group)
848
848
849 self.tested.update(self.current_group)
849 self.tested.update(self.current_group)
850
850
851 def next_group(self, good_delta=None):
851 def next_group(self, good_delta=None):
852 old_good = self._last_good
852 old_good = self._last_good
853 if good_delta is not None:
853 if good_delta is not None:
854 self._last_good = good_delta.base
854 self._last_good = good_delta
855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
855 if self.current_stage == _STAGE_CACHED and good_delta is not None:
856 # the cache is good, let us use the cache as requested
856 # the cache is good, let us use the cache as requested
857 self._candidates_iterator = None
857 self._candidates_iterator = None
858 self._internal_group = None
858 self._internal_group = None
859 self._internal_idx = None
859 self._internal_idx = None
860 self.current_group = None
860 self.current_group = None
861 return
861 return
862
862
863 if (self._internal_idx < len(self._internal_group)) and (
863 if (self._internal_idx < len(self._internal_group)) and (
864 old_good != good_delta
864 old_good != good_delta
865 ):
865 ):
866 # When the size of the candidate group is big, it can result in
866 # When the size of the candidate group is big, it can result in
867 # a quite significant performance impact. To reduce this, we
867 # a quite significant performance impact. To reduce this, we
868 # can send them in smaller batches until the new batch does not
868 # can send them in smaller batches until the new batch does not
869 # provide any improvements.
869 # provide any improvements.
870 #
870 #
871 # This might reduce the overall efficiency of the compression
871 # This might reduce the overall efficiency of the compression
872 # in some corner cases, but that should also prevent very
872 # in some corner cases, but that should also prevent very
873 # pathological cases from being an issue. (eg. 20 000
873 # pathological cases from being an issue. (eg. 20 000
874 # candidates).
874 # candidates).
875 #
875 #
876 # XXX note that the ordering of the group becomes important as
876 # XXX note that the ordering of the group becomes important as
877 # it now impacts the final result. The current order is
877 # it now impacts the final result. The current order is
878 # unprocessed and can be improved.
878 # unprocessed and can be improved.
879 next_idx = self._internal_idx + self._group_chunk_size
879 next_idx = self._internal_idx + self._group_chunk_size
880 self.current_group = self._internal_group[
880 self.current_group = self._internal_group[
881 self._internal_idx : next_idx
881 self._internal_idx : next_idx
882 ]
882 ]
883 self.tested.update(self.current_group)
883 self.tested.update(self.current_group)
884 self._internal_idx = next_idx
884 self._internal_idx = next_idx
885 else:
885 else:
886 self._next_internal_group()
886 self._next_internal_group()
887
887
888 def _pre_filter_candidate_revs(self, temptative):
888 def _pre_filter_candidate_revs(self, temptative):
889 """filter possible candidate before computing a delta
889 """filter possible candidate before computing a delta
890
890
891 This function use various criteria to pre-filter candidate delta base
891 This function use various criteria to pre-filter candidate delta base
892 before we compute a delta and evaluate its quality.
892 before we compute a delta and evaluate its quality.
893
893
894 Such pre-filter limit the number of computed delta, an expensive operation.
894 Such pre-filter limit the number of computed delta, an expensive operation.
895
895
896 return the updated list of revision to test
896 return the updated list of revision to test
897 """
897 """
898 deltalength = self.revlog.length
898 deltalength = self.revlog.length
899 deltaparent = self.revlog.deltaparent
899 deltaparent = self.revlog.deltaparent
900
900
901 tested = self.tested
901 tested = self.tested
902 group = []
902 group = []
903 for rev in temptative:
903 for rev in temptative:
904 # skip over empty delta (no need to include them in a chain)
904 # skip over empty delta (no need to include them in a chain)
905 while not (rev == nullrev or rev in tested or deltalength(rev)):
905 while not (rev == nullrev or rev in tested or deltalength(rev)):
906 tested.add(rev)
906 tested.add(rev)
907 rev = deltaparent(rev)
907 rev = deltaparent(rev)
908 if self._pre_filter_rev(rev):
908 if self._pre_filter_rev(rev):
909 group.append(rev)
909 group.append(rev)
910 else:
910 else:
911 self.tested.add(rev)
911 self.tested.add(rev)
912 return group
912 return group
913
913
914 def _pre_filter_rev_universal(self, rev):
914 def _pre_filter_rev_universal(self, rev):
915 """pre filtering that is need in all cases.
915 """pre filtering that is need in all cases.
916
916
917 return True if it seems okay to test a rev, False otherwise.
917 return True if it seems okay to test a rev, False otherwise.
918
918
919 used by _pre_filter_rev.
919 used by _pre_filter_rev.
920 """
920 """
921 # no need to try a delta against nullrev, this will be done as
921 # no need to try a delta against nullrev, this will be done as
922 # a last resort.
922 # a last resort.
923 if rev == nullrev:
923 if rev == nullrev:
924 return False
924 return False
925 # filter out revision we tested already
925 # filter out revision we tested already
926 if rev in self.tested:
926 if rev in self.tested:
927 return False
927 return False
928
928
929 # an higher authority deamed the base unworthy (e.g. censored)
929 # an higher authority deamed the base unworthy (e.g. censored)
930 if self.excluded_bases is not None and rev in self.excluded_bases:
930 if self.excluded_bases is not None and rev in self.excluded_bases:
931 return False
931 return False
932 # We are in some recomputation cases and that rev is too high
932 # We are in some recomputation cases and that rev is too high
933 # in the revlog
933 # in the revlog
934 if self.target_rev is not None and rev >= self.target_rev:
934 if self.target_rev is not None and rev >= self.target_rev:
935 return False
935 return False
936 # no delta for rawtext-changing revs (see "candelta" for why)
936 # no delta for rawtext-changing revs (see "candelta" for why)
937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
937 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
938 return False
938 return False
939 return True
939 return True
940
940
941 def _pre_filter_rev_delta_chain(self, rev):
941 def _pre_filter_rev_delta_chain(self, rev):
942 """pre filtering that is needed in sparse revlog cases
942 """pre filtering that is needed in sparse revlog cases
943
943
944 return True if it seems okay to test a rev, False otherwise.
944 return True if it seems okay to test a rev, False otherwise.
945
945
946 used by _pre_filter_rev.
946 used by _pre_filter_rev.
947 """
947 """
948 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
948 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
949 # filter out delta base that will never produce good delta
949 # filter out delta base that will never produce good delta
950 #
950 #
951 # if the delta of that base is already bigger than the limit
951 # if the delta of that base is already bigger than the limit
952 # for the delta chain size, doing a delta is hopeless.
952 # for the delta chain size, doing a delta is hopeless.
953 if deltas_limit < self.revlog.length(rev):
953 if deltas_limit < self.revlog.length(rev):
954 return False
954 return False
955
955
956 # If we reach here, we are about to build and test a delta.
956 # If we reach here, we are about to build and test a delta.
957 # The delta building process will compute the chaininfo in all
957 # The delta building process will compute the chaininfo in all
958 # case, since that computation is cached, it is fine to access
958 # case, since that computation is cached, it is fine to access
959 # it here too.
959 # it here too.
960 chainlen, chainsize = self.revlog._chaininfo(rev)
960 chainlen, chainsize = self.revlog._chaininfo(rev)
961 # if chain will be too long, skip base
961 # if chain will be too long, skip base
962 if (
962 if (
963 self.revlog.delta_config.max_chain_len
963 self.revlog.delta_config.max_chain_len
964 and chainlen >= self.revlog.delta_config.max_chain_len
964 and chainlen >= self.revlog.delta_config.max_chain_len
965 ):
965 ):
966 return False
966 return False
967 # if chain already have too much data, skip base
967 # if chain already have too much data, skip base
968 if deltas_limit < chainsize:
968 if deltas_limit < chainsize:
969 return False
969 return False
970 return True
970 return True
971
971
972 def _pre_filter_rev(self, rev):
972 def _pre_filter_rev(self, rev):
973 """return True if it seems okay to test a rev, False otherwise"""
973 """return True if it seems okay to test a rev, False otherwise"""
974 if not self._pre_filter_rev_universal(rev):
974 if not self._pre_filter_rev_universal(rev):
975 return False
975 return False
976 if not self._pre_filter_rev_delta_chain(rev):
976 if not self._pre_filter_rev_delta_chain(rev):
977 return False
977 return False
978 return True
978 return True
979
979
980 def _iter_parents(self):
980 def _iter_parents(self):
981 # exclude already lazy tested base if any
981 # exclude already lazy tested base if any
982 parents = [p for p in (self.p1, self.p2) if p != nullrev]
982 parents = [p for p in (self.p1, self.p2) if p != nullrev]
983
983
984 self.current_stage = _STAGE_PARENTS
984 self.current_stage = _STAGE_PARENTS
985 if (
985 if (
986 not self.revlog.delta_config.delta_both_parents
986 not self.revlog.delta_config.delta_both_parents
987 and len(parents) == 2
987 and len(parents) == 2
988 ):
988 ):
989 parents.sort()
989 parents.sort()
990 # To minimize the chance of having to build a fulltext,
990 # To minimize the chance of having to build a fulltext,
991 # pick first whichever parent is closest to us (max rev)
991 # pick first whichever parent is closest to us (max rev)
992 yield (parents[1],)
992 yield (parents[1],)
993 # then the other one (min rev) if the first did not fit
993 # then the other one (min rev) if the first did not fit
994 yield (parents[0],)
994 yield (parents[0],)
995 elif len(parents) > 0:
995 elif len(parents) > 0:
996 # Test all parents (1 or 2), and keep the best candidate
996 # Test all parents (1 or 2), and keep the best candidate
997 yield parents
997 yield parents
998
998
999 def _iter_prev(self):
999 def _iter_prev(self):
1000 # other approach failed try against prev to hopefully save us a
1000 # other approach failed try against prev to hopefully save us a
1001 # fulltext.
1001 # fulltext.
1002 self.current_stage = _STAGE_PREV
1002 self.current_stage = _STAGE_PREV
1003 yield (self.target_rev - 1,)
1003 yield (self.target_rev - 1,)
1004
1004
1005 def _iter_groups(self):
1005 def _iter_groups(self):
1006 good = None
1006 good = None
1007 for group in self._iter_parents():
1007 for group in self._iter_parents():
1008 good = yield group
1008 good = yield group
1009 if good is not None:
1009 if good is not None:
1010 break
1010 break
1011 else:
1011 else:
1012 assert good is None
1012 assert good is None
1013 yield from self._iter_prev()
1013 yield from self._iter_prev()
1014 yield None
1014 yield None
1015
1015
1016
1016
1017 class _SparseDeltaSearch(_GeneralDeltaSearch):
1017 class _SparseDeltaSearch(_GeneralDeltaSearch):
1018 """Delta search variants for sparse-revlog"""
1018 """Delta search variants for sparse-revlog"""
1019
1019
1020 def is_good_delta_info(self, deltainfo):
1020 def is_good_delta_info(self, deltainfo):
1021 """Returns True if the given delta is good.
1021 """Returns True if the given delta is good.
1022
1022
1023 Good means that it is within the disk span, disk size, and chain length
1023 Good means that it is within the disk span, disk size, and chain length
1024 bounds that we know to be performant.
1024 bounds that we know to be performant.
1025 """
1025 """
1026 if not self._is_good_delta_info_universal(deltainfo):
1026 if not self._is_good_delta_info_universal(deltainfo):
1027 return False
1027 return False
1028 if not self._is_good_delta_info_chain_quality(deltainfo):
1028 if not self._is_good_delta_info_chain_quality(deltainfo):
1029 return False
1029 return False
1030 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1030 if not self._is_good_delta_info_snapshot_constraints(deltainfo):
1031 return False
1031 return False
1032 return True
1032 return True
1033
1033
1034 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1034 def _is_good_delta_info_snapshot_constraints(self, deltainfo):
1035 """Returns True if the chain associated with snapshots
1035 """Returns True if the chain associated with snapshots
1036
1036
1037 This performs checks for format that use sparse-revlog and intermediate
1037 This performs checks for format that use sparse-revlog and intermediate
1038 snapshots.
1038 snapshots.
1039
1039
1040 This is used by is_good_delta_info.
1040 This is used by is_good_delta_info.
1041 """
1041 """
1042 # if not a snapshot, this method has no filtering to do
1042 # if not a snapshot, this method has no filtering to do
1043 if deltainfo.snapshotdepth is None:
1043 if deltainfo.snapshotdepth is None:
1044 return True
1044 return True
1045 # bad delta from intermediate snapshot size limit
1045 # bad delta from intermediate snapshot size limit
1046 #
1046 #
1047 # If an intermediate snapshot size is higher than the limit. The
1047 # If an intermediate snapshot size is higher than the limit. The
1048 # limit exist to prevent endless chain of intermediate delta to be
1048 # limit exist to prevent endless chain of intermediate delta to be
1049 # created.
1049 # created.
1050 if (
1050 if (
1051 self.revinfo.textlen >> deltainfo.snapshotdepth
1051 self.revinfo.textlen >> deltainfo.snapshotdepth
1052 ) < deltainfo.deltalen:
1052 ) < deltainfo.deltalen:
1053 return False
1053 return False
1054
1054
1055 # bad delta if new intermediate snapshot is larger than the previous
1055 # bad delta if new intermediate snapshot is larger than the previous
1056 # snapshot
1056 # snapshot
1057 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1057 if self.revlog.length(deltainfo.base) < deltainfo.deltalen:
1058 return False
1058 return False
1059
1059
1060 return True
1060 return True
1061
1061
1062 def _pre_filter_rev(self, rev):
1062 def _pre_filter_rev(self, rev):
1063 """return True if it seems okay to test a rev, False otherwise"""
1063 """return True if it seems okay to test a rev, False otherwise"""
1064 if not self._pre_filter_rev_universal(rev):
1064 if not self._pre_filter_rev_universal(rev):
1065 return False
1065 return False
1066 if not self._pre_filter_rev_delta_chain(rev):
1066 if not self._pre_filter_rev_delta_chain(rev):
1067 return False
1067 return False
1068 if not self._pre_filter_rev_sparse(rev):
1068 if not self._pre_filter_rev_sparse(rev):
1069 return False
1069 return False
1070 return True
1070 return True
1071
1071
1072 def _pre_filter_rev_sparse(self, rev):
1072 def _pre_filter_rev_sparse(self, rev):
1073 """pre filtering that is needed in sparse revlog cases
1073 """pre filtering that is needed in sparse revlog cases
1074
1074
1075 return True if it seems okay to test a rev, False otherwise.
1075 return True if it seems okay to test a rev, False otherwise.
1076
1076
1077 used by _pre_filter_rev.
1077 used by _pre_filter_rev.
1078 """
1078 """
1079 assert self.revlog.delta_config.sparse_revlog
1079 assert self.revlog.delta_config.sparse_revlog
1080 # if the revision we test again is too small, the resulting delta
1080 # if the revision we test again is too small, the resulting delta
1081 # will be large anyway as that amount of data to be added is big
1081 # will be large anyway as that amount of data to be added is big
1082 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1082 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
1083 return False
1083 return False
1084
1084
1085 if self.revlog.delta_config.upper_bound_comp is not None:
1085 if self.revlog.delta_config.upper_bound_comp is not None:
1086 maxcomp = self.revlog.delta_config.upper_bound_comp
1086 maxcomp = self.revlog.delta_config.upper_bound_comp
1087 basenotsnap = (self.p1, self.p2, nullrev)
1087 basenotsnap = (self.p1, self.p2, nullrev)
1088 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1088 if rev not in basenotsnap and self.revlog.issnapshot(rev):
1089 snapshotdepth = self.revlog.snapshotdepth(rev)
1089 snapshotdepth = self.revlog.snapshotdepth(rev)
1090 # If text is significantly larger than the base, we can
1090 # If text is significantly larger than the base, we can
1091 # expect the resulting delta to be proportional to the size
1091 # expect the resulting delta to be proportional to the size
1092 # difference
1092 # difference
1093 revsize = self.revlog.rawsize(rev)
1093 revsize = self.revlog.rawsize(rev)
1094 rawsizedistance = max(self.textlen - revsize, 0)
1094 rawsizedistance = max(self.textlen - revsize, 0)
1095 # use an estimate of the compression upper bound.
1095 # use an estimate of the compression upper bound.
1096 lowestrealisticdeltalen = rawsizedistance // maxcomp
1096 lowestrealisticdeltalen = rawsizedistance // maxcomp
1097
1097
1098 # check the absolute constraint on the delta size
1098 # check the absolute constraint on the delta size
1099 snapshotlimit = self.textlen >> snapshotdepth
1099 snapshotlimit = self.textlen >> snapshotdepth
1100 if snapshotlimit < lowestrealisticdeltalen:
1100 if snapshotlimit < lowestrealisticdeltalen:
1101 # delta lower bound is larger than accepted upper
1101 # delta lower bound is larger than accepted upper
1102 # bound
1102 # bound
1103 return False
1103 return False
1104
1104
1105 # check the relative constraint on the delta size
1105 # check the relative constraint on the delta size
1106 revlength = self.revlog.length(rev)
1106 revlength = self.revlog.length(rev)
1107 if revlength < lowestrealisticdeltalen:
1107 if revlength < lowestrealisticdeltalen:
1108 # delta probable lower bound is larger than target
1108 # delta probable lower bound is larger than target
1109 # base
1109 # base
1110 return False
1110 return False
1111 return True
1111 return True
1112
1112
1113 def _iter_snapshots_base(self):
1113 def _iter_snapshots_base(self):
1114 assert self.revlog.delta_config.sparse_revlog
1114 assert self.revlog.delta_config.sparse_revlog
1115 assert self.current_stage == _STAGE_SNAPSHOT
1115 assert self.current_stage == _STAGE_SNAPSHOT
1116 prev = self.target_rev - 1
1116 prev = self.target_rev - 1
1117 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1117 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
1118
1118
1119 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1119 parents = [p for p in (self.p1, self.p2) if p != nullrev]
1120 if not parents:
1120 if not parents:
1121 return
1121 return
1122 # See if we can use an existing snapshot in the parent chains to
1122 # See if we can use an existing snapshot in the parent chains to
1123 # use as a base for a new intermediate-snapshot
1123 # use as a base for a new intermediate-snapshot
1124 #
1124 #
1125 # search for snapshot in parents delta chain map: snapshot-level:
1125 # search for snapshot in parents delta chain map: snapshot-level:
1126 # snapshot-rev
1126 # snapshot-rev
1127 parents_snaps = collections.defaultdict(set)
1127 parents_snaps = collections.defaultdict(set)
1128 candidate_chains = [deltachain(p) for p in parents]
1128 candidate_chains = [deltachain(p) for p in parents]
1129 for chain in candidate_chains:
1129 for chain in candidate_chains:
1130 for idx, s in enumerate(chain):
1130 for idx, s in enumerate(chain):
1131 if not self.revlog.issnapshot(s):
1131 if not self.revlog.issnapshot(s):
1132 break
1132 break
1133 parents_snaps[idx].add(s)
1133 parents_snaps[idx].add(s)
1134 snapfloor = min(parents_snaps[0]) + 1
1134 snapfloor = min(parents_snaps[0]) + 1
1135 self.snapshot_cache.update(self.revlog, snapfloor)
1135 self.snapshot_cache.update(self.revlog, snapfloor)
1136 # search for the highest "unrelated" revision
1136 # search for the highest "unrelated" revision
1137 #
1137 #
1138 # Adding snapshots used by "unrelated" revision increase the odd we
1138 # Adding snapshots used by "unrelated" revision increase the odd we
1139 # reuse an independant, yet better snapshot chain.
1139 # reuse an independant, yet better snapshot chain.
1140 #
1140 #
1141 # XXX instead of building a set of revisions, we could lazily
1141 # XXX instead of building a set of revisions, we could lazily
1142 # enumerate over the chains. That would be more efficient, however
1142 # enumerate over the chains. That would be more efficient, however
1143 # we stick to simple code for now.
1143 # we stick to simple code for now.
1144 all_revs = set()
1144 all_revs = set()
1145 for chain in candidate_chains:
1145 for chain in candidate_chains:
1146 all_revs.update(chain)
1146 all_revs.update(chain)
1147 other = None
1147 other = None
1148 for r in self.revlog.revs(prev, snapfloor):
1148 for r in self.revlog.revs(prev, snapfloor):
1149 if r not in all_revs:
1149 if r not in all_revs:
1150 other = r
1150 other = r
1151 break
1151 break
1152 if other is not None:
1152 if other is not None:
1153 # To avoid unfair competition, we won't use unrelated
1153 # To avoid unfair competition, we won't use unrelated
1154 # intermediate snapshot that are deeper than the ones from the
1154 # intermediate snapshot that are deeper than the ones from the
1155 # parent delta chain.
1155 # parent delta chain.
1156 max_depth = max(parents_snaps.keys())
1156 max_depth = max(parents_snaps.keys())
1157 chain = deltachain(other)
1157 chain = deltachain(other)
1158 for depth, s in enumerate(chain):
1158 for depth, s in enumerate(chain):
1159 if s < snapfloor:
1159 if s < snapfloor:
1160 continue
1160 continue
1161 if max_depth < depth:
1161 if max_depth < depth:
1162 break
1162 break
1163 if not self.revlog.issnapshot(s):
1163 if not self.revlog.issnapshot(s):
1164 break
1164 break
1165 parents_snaps[depth].add(s)
1165 parents_snaps[depth].add(s)
1166 # Test them as possible intermediate snapshot base We test them
1166 # Test them as possible intermediate snapshot base We test them
1167 # from highest to lowest level. High level one are more likely to
1167 # from highest to lowest level. High level one are more likely to
1168 # result in small delta
1168 # result in small delta
1169 floor = None
1169 floor = None
1170 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1170 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
1171 siblings = set()
1171 siblings = set()
1172 for s in snaps:
1172 for s in snaps:
1173 siblings.update(self.snapshot_cache.snapshots[s])
1173 siblings.update(self.snapshot_cache.snapshots[s])
1174 # Before considering making a new intermediate snapshot, we
1174 # Before considering making a new intermediate snapshot, we
1175 # check if an existing snapshot, children of base we consider,
1175 # check if an existing snapshot, children of base we consider,
1176 # would be suitable.
1176 # would be suitable.
1177 #
1177 #
1178 # It give a change to reuse a delta chain "unrelated" to the
1178 # It give a change to reuse a delta chain "unrelated" to the
1179 # current revision instead of starting our own. Without such
1179 # current revision instead of starting our own. Without such
1180 # re-use, topological branches would keep reopening new chains.
1180 # re-use, topological branches would keep reopening new chains.
1181 # Creating more and more snapshot as the repository grow.
1181 # Creating more and more snapshot as the repository grow.
1182
1182
1183 if floor is not None:
1183 if floor is not None:
1184 # We only do this for siblings created after the one in our
1184 # We only do this for siblings created after the one in our
1185 # parent's delta chain. Those created before has less
1185 # parent's delta chain. Those created before has less
1186 # chances to be valid base since our ancestors had to
1186 # chances to be valid base since our ancestors had to
1187 # create a new snapshot.
1187 # create a new snapshot.
1188 siblings = [r for r in siblings if floor < r]
1188 siblings = [r for r in siblings if floor < r]
1189 yield tuple(sorted(siblings))
1189 yield tuple(sorted(siblings))
1190 # then test the base from our parent's delta chain.
1190 # then test the base from our parent's delta chain.
1191 yield tuple(sorted(snaps))
1191 yield tuple(sorted(snaps))
1192 floor = min(snaps)
1192 floor = min(snaps)
1193 # No suitable base found in the parent chain, search if any full
1193 # No suitable base found in the parent chain, search if any full
1194 # snapshots emitted since parent's base would be a suitable base
1194 # snapshots emitted since parent's base would be a suitable base
1195 # for an intermediate snapshot.
1195 # for an intermediate snapshot.
1196 #
1196 #
1197 # It give a chance to reuse a delta chain unrelated to the current
1197 # It give a chance to reuse a delta chain unrelated to the current
1198 # revisions instead of starting our own. Without such re-use,
1198 # revisions instead of starting our own. Without such re-use,
1199 # topological branches would keep reopening new full chains.
1199 # topological branches would keep reopening new full chains.
1200 # Creating more and more snapshot as the repository grow.
1200 # Creating more and more snapshot as the repository grow.
1201 full = [
1201 full = [
1202 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1202 r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r
1203 ]
1203 ]
1204 yield tuple(sorted(full))
1204 yield tuple(sorted(full))
1205
1205
1206 def _iter_snapshots(self):
1206 def _iter_snapshots(self):
1207 assert self.revlog.delta_config.sparse_revlog
1207 assert self.revlog.delta_config.sparse_revlog
1208 self.current_stage = _STAGE_SNAPSHOT
1208 self.current_stage = _STAGE_SNAPSHOT
1209 good = None
1209 good = None
1210 groups = self._iter_snapshots_base()
1210 groups = self._iter_snapshots_base()
1211 for candidates in groups:
1211 for candidates in groups:
1212 good = yield candidates
1212 good = yield candidates
1213 if good is not None:
1213 if good is not None:
1214 break
1214 break
1215 # if we have a refinable value, try to refine it
1215 # if we have a refinable value, try to refine it
1216 if (
1216 if good is not None and good.snapshotdepth is not None:
1217 good is not None
1218 and good not in (self.p1, self.p2)
1219 and self.revlog.issnapshot(good)
1220 ):
1221 assert self.current_stage == _STAGE_SNAPSHOT
1217 assert self.current_stage == _STAGE_SNAPSHOT
1222 # refine snapshot down
1218 # refine snapshot down
1223 previous = None
1219 previous = None
1224 while previous != good:
1220 while previous != good:
1225 previous = good
1221 previous = good
1226 base = self.revlog.deltaparent(good)
1222 base = self.revlog.deltaparent(good.base)
1227 if base == nullrev:
1223 if base == nullrev:
1228 break
1224 break
1229 good = yield (base,)
1225 good = yield (base,)
1230 # refine snapshot up
1226 # refine snapshot up
1231 if not self.snapshot_cache.snapshots:
1227 if not self.snapshot_cache.snapshots:
1232 self.snapshot_cache.update(self.revlog, good + 1)
1228 self.snapshot_cache.update(self.revlog, good.base + 1)
1233 previous = None
1229 previous = None
1234 while good != previous:
1230 while good != previous:
1235 previous = good
1231 previous = good
1236 children = tuple(
1232 children = tuple(
1237 sorted(c for c in self.snapshot_cache.snapshots[good])
1233 sorted(c for c in self.snapshot_cache.snapshots[good.base])
1238 )
1234 )
1239 good = yield children
1235 good = yield children
1240 yield None
1236 yield None
1241
1237
1242 def _iter_groups(self):
1238 def _iter_groups(self):
1243 good = None
1239 good = None
1244 for group in self._iter_parents():
1240 for group in self._iter_parents():
1245 good = yield group
1241 good = yield group
1246 if good is not None:
1242 if good is not None:
1247 break
1243 break
1248 else:
1244 else:
1249 assert good is None
1245 assert good is None
1250 assert self.revlog.delta_config.sparse_revlog
1246 assert self.revlog.delta_config.sparse_revlog
1251 # If sparse revlog is enabled, we can try to refine the
1247 # If sparse revlog is enabled, we can try to refine the
1252 # available deltas
1248 # available deltas
1253 iter_snap = self._iter_snapshots()
1249 iter_snap = self._iter_snapshots()
1254 group = iter_snap.send(None)
1250 group = iter_snap.send(None)
1255 while group is not None:
1251 while group is not None:
1256 good = yield group
1252 good = yield group
1257 group = iter_snap.send(good)
1253 group = iter_snap.send(good)
1258 yield None
1254 yield None
1259
1255
1260
1256
1261 class SnapshotCache:
1257 class SnapshotCache:
1262 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1258 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1263
1259
1264 def __init__(self):
1260 def __init__(self):
1265 self.snapshots = collections.defaultdict(set)
1261 self.snapshots = collections.defaultdict(set)
1266 self._start_rev = None
1262 self._start_rev = None
1267 self._end_rev = None
1263 self._end_rev = None
1268
1264
1269 def update(self, revlog, start_rev=0):
1265 def update(self, revlog, start_rev=0):
1270 """find snapshots from start_rev to tip"""
1266 """find snapshots from start_rev to tip"""
1271 nb_revs = len(revlog)
1267 nb_revs = len(revlog)
1272 end_rev = nb_revs - 1
1268 end_rev = nb_revs - 1
1273 if start_rev > end_rev:
1269 if start_rev > end_rev:
1274 return # range is empty
1270 return # range is empty
1275
1271
1276 if self._start_rev is None:
1272 if self._start_rev is None:
1277 assert self._end_rev is None
1273 assert self._end_rev is None
1278 self._update(revlog, start_rev, end_rev)
1274 self._update(revlog, start_rev, end_rev)
1279 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1275 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1280 if start_rev < self._start_rev:
1276 if start_rev < self._start_rev:
1281 self._update(revlog, start_rev, self._start_rev - 1)
1277 self._update(revlog, start_rev, self._start_rev - 1)
1282 if self._end_rev < end_rev:
1278 if self._end_rev < end_rev:
1283 self._update(revlog, self._end_rev + 1, end_rev)
1279 self._update(revlog, self._end_rev + 1, end_rev)
1284
1280
1285 if self._start_rev is None:
1281 if self._start_rev is None:
1286 assert self._end_rev is None
1282 assert self._end_rev is None
1287 self._end_rev = end_rev
1283 self._end_rev = end_rev
1288 self._start_rev = start_rev
1284 self._start_rev = start_rev
1289 else:
1285 else:
1290 self._start_rev = min(self._start_rev, start_rev)
1286 self._start_rev = min(self._start_rev, start_rev)
1291 self._end_rev = max(self._end_rev, end_rev)
1287 self._end_rev = max(self._end_rev, end_rev)
1292 assert self._start_rev <= self._end_rev, (
1288 assert self._start_rev <= self._end_rev, (
1293 self._start_rev,
1289 self._start_rev,
1294 self._end_rev,
1290 self._end_rev,
1295 )
1291 )
1296
1292
1297 def _update(self, revlog, start_rev, end_rev):
1293 def _update(self, revlog, start_rev, end_rev):
1298 """internal method that actually do update content"""
1294 """internal method that actually do update content"""
1299 assert self._start_rev is None or (
1295 assert self._start_rev is None or (
1300 start_rev < self._start_rev or start_rev > self._end_rev
1296 start_rev < self._start_rev or start_rev > self._end_rev
1301 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1297 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1302 assert self._start_rev is None or (
1298 assert self._start_rev is None or (
1303 end_rev < self._start_rev or end_rev > self._end_rev
1299 end_rev < self._start_rev or end_rev > self._end_rev
1304 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1300 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1305 cache = self.snapshots
1301 cache = self.snapshots
1306 if hasattr(revlog.index, 'findsnapshots'):
1302 if hasattr(revlog.index, 'findsnapshots'):
1307 revlog.index.findsnapshots(cache, start_rev, end_rev)
1303 revlog.index.findsnapshots(cache, start_rev, end_rev)
1308 else:
1304 else:
1309 deltaparent = revlog.deltaparent
1305 deltaparent = revlog.deltaparent
1310 issnapshot = revlog.issnapshot
1306 issnapshot = revlog.issnapshot
1311 for rev in revlog.revs(start_rev, end_rev):
1307 for rev in revlog.revs(start_rev, end_rev):
1312 if issnapshot(rev):
1308 if issnapshot(rev):
1313 cache[deltaparent(rev)].add(rev)
1309 cache[deltaparent(rev)].add(rev)
1314
1310
1315
1311
1316 class deltacomputer:
1312 class deltacomputer:
1317 """object capable of computing delta and finding delta for multiple revision
1313 """object capable of computing delta and finding delta for multiple revision
1318
1314
1319 This object is meant to compute and find multiple delta applied to the same
1315 This object is meant to compute and find multiple delta applied to the same
1320 revlog.
1316 revlog.
1321 """
1317 """
1322
1318
1323 def __init__(
1319 def __init__(
1324 self,
1320 self,
1325 revlog,
1321 revlog,
1326 write_debug=None,
1322 write_debug=None,
1327 debug_search=False,
1323 debug_search=False,
1328 debug_info=None,
1324 debug_info=None,
1329 ):
1325 ):
1330 self.revlog = revlog
1326 self.revlog = revlog
1331 self._write_debug = write_debug
1327 self._write_debug = write_debug
1332 if write_debug is None:
1328 if write_debug is None:
1333 self._debug_search = False
1329 self._debug_search = False
1334 else:
1330 else:
1335 self._debug_search = debug_search
1331 self._debug_search = debug_search
1336 self._debug_info = debug_info
1332 self._debug_info = debug_info
1337 self._snapshot_cache = SnapshotCache()
1333 self._snapshot_cache = SnapshotCache()
1338
1334
1339 @property
1335 @property
1340 def _gather_debug(self):
1336 def _gather_debug(self):
1341 return self._write_debug is not None or self._debug_info is not None
1337 return self._write_debug is not None or self._debug_info is not None
1342
1338
1343 def buildtext(self, revinfo):
1339 def buildtext(self, revinfo):
1344 """Builds a fulltext version of a revision
1340 """Builds a fulltext version of a revision
1345
1341
1346 revinfo: revisioninfo instance that contains all needed info
1342 revinfo: revisioninfo instance that contains all needed info
1347 """
1343 """
1348 btext = revinfo.btext
1344 btext = revinfo.btext
1349 if btext[0] is not None:
1345 if btext[0] is not None:
1350 return btext[0]
1346 return btext[0]
1351
1347
1352 revlog = self.revlog
1348 revlog = self.revlog
1353 cachedelta = revinfo.cachedelta
1349 cachedelta = revinfo.cachedelta
1354 baserev = cachedelta[0]
1350 baserev = cachedelta[0]
1355 delta = cachedelta[1]
1351 delta = cachedelta[1]
1356
1352
1357 fulltext = btext[0] = _textfromdelta(
1353 fulltext = btext[0] = _textfromdelta(
1358 revlog,
1354 revlog,
1359 baserev,
1355 baserev,
1360 delta,
1356 delta,
1361 revinfo.p1,
1357 revinfo.p1,
1362 revinfo.p2,
1358 revinfo.p2,
1363 revinfo.flags,
1359 revinfo.flags,
1364 revinfo.node,
1360 revinfo.node,
1365 )
1361 )
1366 return fulltext
1362 return fulltext
1367
1363
1368 def _builddeltadiff(self, base, revinfo):
1364 def _builddeltadiff(self, base, revinfo):
1369 revlog = self.revlog
1365 revlog = self.revlog
1370 t = self.buildtext(revinfo)
1366 t = self.buildtext(revinfo)
1371 if revlog.iscensored(base):
1367 if revlog.iscensored(base):
1372 # deltas based on a censored revision must replace the
1368 # deltas based on a censored revision must replace the
1373 # full content in one patch, so delta works everywhere
1369 # full content in one patch, so delta works everywhere
1374 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1370 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1375 delta = header + t
1371 delta = header + t
1376 else:
1372 else:
1377 ptext = revlog.rawdata(base)
1373 ptext = revlog.rawdata(base)
1378 delta = mdiff.textdiff(ptext, t)
1374 delta = mdiff.textdiff(ptext, t)
1379
1375
1380 return delta
1376 return delta
1381
1377
1382 def _builddeltainfo(
1378 def _builddeltainfo(
1383 self, revinfo, base, target_rev=None, as_snapshot=False
1379 self, revinfo, base, target_rev=None, as_snapshot=False
1384 ):
1380 ):
1385 # can we use the cached delta?
1381 # can we use the cached delta?
1386 revlog = self.revlog
1382 revlog = self.revlog
1387 chainbase = revlog.chainbase(base)
1383 chainbase = revlog.chainbase(base)
1388 if revlog.delta_config.general_delta:
1384 if revlog.delta_config.general_delta:
1389 deltabase = base
1385 deltabase = base
1390 else:
1386 else:
1391 if target_rev is not None and base != target_rev - 1:
1387 if target_rev is not None and base != target_rev - 1:
1392 msg = (
1388 msg = (
1393 b'general delta cannot use delta for something else '
1389 b'general delta cannot use delta for something else '
1394 b'than `prev`: %d<-%d'
1390 b'than `prev`: %d<-%d'
1395 )
1391 )
1396 msg %= (base, target_rev)
1392 msg %= (base, target_rev)
1397 raise error.ProgrammingError(msg)
1393 raise error.ProgrammingError(msg)
1398 deltabase = chainbase
1394 deltabase = chainbase
1399 snapshotdepth = None
1395 snapshotdepth = None
1400 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1396 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1401 snapshotdepth = 0
1397 snapshotdepth = 0
1402 elif revlog.delta_config.sparse_revlog and as_snapshot:
1398 elif revlog.delta_config.sparse_revlog and as_snapshot:
1403 assert revlog.issnapshot(deltabase)
1399 assert revlog.issnapshot(deltabase)
1404 # A delta chain should always be one full snapshot,
1400 # A delta chain should always be one full snapshot,
1405 # zero or more semi-snapshots, and zero or more deltas
1401 # zero or more semi-snapshots, and zero or more deltas
1406 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1402 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1407 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1403 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1408 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1404 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1409 delta = None
1405 delta = None
1410 if revinfo.cachedelta:
1406 if revinfo.cachedelta:
1411 cachebase = revinfo.cachedelta[0]
1407 cachebase = revinfo.cachedelta[0]
1412 # check if the diff still apply
1408 # check if the diff still apply
1413 currentbase = cachebase
1409 currentbase = cachebase
1414 while (
1410 while (
1415 currentbase != nullrev
1411 currentbase != nullrev
1416 and currentbase != base
1412 and currentbase != base
1417 and self.revlog.length(currentbase) == 0
1413 and self.revlog.length(currentbase) == 0
1418 ):
1414 ):
1419 currentbase = self.revlog.deltaparent(currentbase)
1415 currentbase = self.revlog.deltaparent(currentbase)
1420 if self.revlog.delta_config.lazy_delta and currentbase == base:
1416 if self.revlog.delta_config.lazy_delta and currentbase == base:
1421 delta = revinfo.cachedelta[1]
1417 delta = revinfo.cachedelta[1]
1422 if delta is None:
1418 if delta is None:
1423 delta = self._builddeltadiff(base, revinfo)
1419 delta = self._builddeltadiff(base, revinfo)
1424 if self._debug_search:
1420 if self._debug_search:
1425 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1421 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1426 msg %= len(delta)
1422 msg %= len(delta)
1427 self._write_debug(msg)
1423 self._write_debug(msg)
1428 # snapshotdept need to be neither None nor 0 level snapshot
1424 # snapshotdept need to be neither None nor 0 level snapshot
1429 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1425 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1430 lowestrealisticdeltalen = (
1426 lowestrealisticdeltalen = (
1431 len(delta) // revlog.delta_config.upper_bound_comp
1427 len(delta) // revlog.delta_config.upper_bound_comp
1432 )
1428 )
1433 snapshotlimit = revinfo.textlen >> snapshotdepth
1429 snapshotlimit = revinfo.textlen >> snapshotdepth
1434 if self._debug_search:
1430 if self._debug_search:
1435 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1431 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1436 msg %= lowestrealisticdeltalen
1432 msg %= lowestrealisticdeltalen
1437 self._write_debug(msg)
1433 self._write_debug(msg)
1438 if snapshotlimit < lowestrealisticdeltalen:
1434 if snapshotlimit < lowestrealisticdeltalen:
1439 if self._debug_search:
1435 if self._debug_search:
1440 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1436 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1441 self._write_debug(msg)
1437 self._write_debug(msg)
1442 return None
1438 return None
1443 if revlog.length(base) < lowestrealisticdeltalen:
1439 if revlog.length(base) < lowestrealisticdeltalen:
1444 if self._debug_search:
1440 if self._debug_search:
1445 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1441 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1446 self._write_debug(msg)
1442 self._write_debug(msg)
1447 return None
1443 return None
1448 header, data = revlog._inner.compress(delta)
1444 header, data = revlog._inner.compress(delta)
1449 deltalen = len(header) + len(data)
1445 deltalen = len(header) + len(data)
1450 offset = revlog.end(len(revlog) - 1)
1446 offset = revlog.end(len(revlog) - 1)
1451 dist = deltalen + offset - revlog.start(chainbase)
1447 dist = deltalen + offset - revlog.start(chainbase)
1452 chainlen, compresseddeltalen = revlog._chaininfo(base)
1448 chainlen, compresseddeltalen = revlog._chaininfo(base)
1453 chainlen += 1
1449 chainlen += 1
1454 compresseddeltalen += deltalen
1450 compresseddeltalen += deltalen
1455
1451
1456 return _deltainfo(
1452 return _deltainfo(
1457 dist,
1453 dist,
1458 deltalen,
1454 deltalen,
1459 (header, data),
1455 (header, data),
1460 deltabase,
1456 deltabase,
1461 chainbase,
1457 chainbase,
1462 chainlen,
1458 chainlen,
1463 compresseddeltalen,
1459 compresseddeltalen,
1464 snapshotdepth,
1460 snapshotdepth,
1465 )
1461 )
1466
1462
1467 def _fullsnapshotinfo(self, revinfo, curr):
1463 def _fullsnapshotinfo(self, revinfo, curr):
1468 rawtext = self.buildtext(revinfo)
1464 rawtext = self.buildtext(revinfo)
1469 data = self.revlog._inner.compress(rawtext)
1465 data = self.revlog._inner.compress(rawtext)
1470 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1466 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1471 deltabase = chainbase = curr
1467 deltabase = chainbase = curr
1472 snapshotdepth = 0
1468 snapshotdepth = 0
1473 chainlen = 1
1469 chainlen = 1
1474
1470
1475 return _deltainfo(
1471 return _deltainfo(
1476 dist,
1472 dist,
1477 deltalen,
1473 deltalen,
1478 data,
1474 data,
1479 deltabase,
1475 deltabase,
1480 chainbase,
1476 chainbase,
1481 chainlen,
1477 chainlen,
1482 compresseddeltalen,
1478 compresseddeltalen,
1483 snapshotdepth,
1479 snapshotdepth,
1484 )
1480 )
1485
1481
1486 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1482 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1487 """Find an acceptable delta against a candidate revision
1483 """Find an acceptable delta against a candidate revision
1488
1484
1489 revinfo: information about the revision (instance of _revisioninfo)
1485 revinfo: information about the revision (instance of _revisioninfo)
1490
1486
1491 Returns the first acceptable candidate revision, as ordered by
1487 Returns the first acceptable candidate revision, as ordered by
1492 _candidategroups
1488 _candidategroups
1493
1489
1494 If no suitable deltabase is found, we return delta info for a full
1490 If no suitable deltabase is found, we return delta info for a full
1495 snapshot.
1491 snapshot.
1496
1492
1497 `excluded_bases` is an optional set of revision that cannot be used as
1493 `excluded_bases` is an optional set of revision that cannot be used as
1498 a delta base. Use this to recompute delta suitable in censor or strip
1494 a delta base. Use this to recompute delta suitable in censor or strip
1499 context.
1495 context.
1500 """
1496 """
1501 if target_rev is None:
1497 if target_rev is None:
1502 target_rev = len(self.revlog)
1498 target_rev = len(self.revlog)
1503
1499
1504 gather_debug = self._gather_debug
1500 gather_debug = self._gather_debug
1505 cachedelta = revinfo.cachedelta
1501 cachedelta = revinfo.cachedelta
1506 revlog = self.revlog
1502 revlog = self.revlog
1507 p1r = p2r = None
1503 p1r = p2r = None
1508
1504
1509 if excluded_bases is None:
1505 if excluded_bases is None:
1510 excluded_bases = set()
1506 excluded_bases = set()
1511
1507
1512 if gather_debug:
1508 if gather_debug:
1513 start = util.timer()
1509 start = util.timer()
1514 dbg = self._one_dbg_data()
1510 dbg = self._one_dbg_data()
1515 dbg['revision'] = target_rev
1511 dbg['revision'] = target_rev
1516 p1r = revlog.rev(revinfo.p1)
1512 p1r = revlog.rev(revinfo.p1)
1517 p2r = revlog.rev(revinfo.p2)
1513 p2r = revlog.rev(revinfo.p2)
1518 if p1r != nullrev:
1514 if p1r != nullrev:
1519 p1_chain_len = revlog._chaininfo(p1r)[0]
1515 p1_chain_len = revlog._chaininfo(p1r)[0]
1520 else:
1516 else:
1521 p1_chain_len = -1
1517 p1_chain_len = -1
1522 if p2r != nullrev:
1518 if p2r != nullrev:
1523 p2_chain_len = revlog._chaininfo(p2r)[0]
1519 p2_chain_len = revlog._chaininfo(p2r)[0]
1524 else:
1520 else:
1525 p2_chain_len = -1
1521 p2_chain_len = -1
1526 dbg['p1-chain-len'] = p1_chain_len
1522 dbg['p1-chain-len'] = p1_chain_len
1527 dbg['p2-chain-len'] = p2_chain_len
1523 dbg['p2-chain-len'] = p2_chain_len
1528
1524
1529 # 1) if the revision is empty, no amount of delta can beat it
1525 # 1) if the revision is empty, no amount of delta can beat it
1530 #
1526 #
1531 # 2) no delta for flag processor revision (see "candelta" for why)
1527 # 2) no delta for flag processor revision (see "candelta" for why)
1532 # not calling candelta since only one revision needs test, also to
1528 # not calling candelta since only one revision needs test, also to
1533 # avoid overhead fetching flags again.
1529 # avoid overhead fetching flags again.
1534 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1530 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1535 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1531 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1536 if gather_debug:
1532 if gather_debug:
1537 end = util.timer()
1533 end = util.timer()
1538 dbg['duration'] = end - start
1534 dbg['duration'] = end - start
1539 dbg[
1535 dbg[
1540 'delta-base'
1536 'delta-base'
1541 ] = deltainfo.base # pytype: disable=attribute-error
1537 ] = deltainfo.base # pytype: disable=attribute-error
1542 dbg['search_round_count'] = 0
1538 dbg['search_round_count'] = 0
1543 dbg['using-cached-base'] = False
1539 dbg['using-cached-base'] = False
1544 dbg['delta_try_count'] = 0
1540 dbg['delta_try_count'] = 0
1545 dbg['type'] = b"full"
1541 dbg['type'] = b"full"
1546 dbg['snapshot-depth'] = 0
1542 dbg['snapshot-depth'] = 0
1547 self._dbg_process_data(dbg)
1543 self._dbg_process_data(dbg)
1548 return deltainfo
1544 return deltainfo
1549
1545
1550 deltainfo = None
1546 deltainfo = None
1551
1547
1552 # If this source delta are to be forcibly reuse, let us comply early.
1548 # If this source delta are to be forcibly reuse, let us comply early.
1553 if (
1549 if (
1554 revlog.delta_config.general_delta
1550 revlog.delta_config.general_delta
1555 and revinfo.cachedelta is not None
1551 and revinfo.cachedelta is not None
1556 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1552 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1557 ):
1553 ):
1558 base = revinfo.cachedelta[0]
1554 base = revinfo.cachedelta[0]
1559 if base == nullrev:
1555 if base == nullrev:
1560 dbg_type = b"full"
1556 dbg_type = b"full"
1561 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1557 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1562 if gather_debug:
1558 if gather_debug:
1563 snapshotdepth = 0
1559 snapshotdepth = 0
1564 elif base not in excluded_bases:
1560 elif base not in excluded_bases:
1565 delta = revinfo.cachedelta[1]
1561 delta = revinfo.cachedelta[1]
1566 header, data = revlog.compress(delta)
1562 header, data = revlog.compress(delta)
1567 deltalen = len(header) + len(data)
1563 deltalen = len(header) + len(data)
1568 if gather_debug:
1564 if gather_debug:
1569 offset = revlog.end(len(revlog) - 1)
1565 offset = revlog.end(len(revlog) - 1)
1570 chainbase = revlog.chainbase(base)
1566 chainbase = revlog.chainbase(base)
1571 distance = deltalen + offset - revlog.start(chainbase)
1567 distance = deltalen + offset - revlog.start(chainbase)
1572 chainlen, compresseddeltalen = revlog._chaininfo(base)
1568 chainlen, compresseddeltalen = revlog._chaininfo(base)
1573 chainlen += 1
1569 chainlen += 1
1574 compresseddeltalen += deltalen
1570 compresseddeltalen += deltalen
1575 if base == p1r or base == p2r:
1571 if base == p1r or base == p2r:
1576 dbg_type = b"delta"
1572 dbg_type = b"delta"
1577 snapshotdepth = None
1573 snapshotdepth = None
1578 elif not revlog.issnapshot(base):
1574 elif not revlog.issnapshot(base):
1579 snapshotdepth = None
1575 snapshotdepth = None
1580 else:
1576 else:
1581 dbg_type = b"snapshot"
1577 dbg_type = b"snapshot"
1582 snapshotdepth = revlog.snapshotdepth(base) + 1
1578 snapshotdepth = revlog.snapshotdepth(base) + 1
1583 else:
1579 else:
1584 distance = None
1580 distance = None
1585 chainbase = None
1581 chainbase = None
1586 chainlen = None
1582 chainlen = None
1587 compresseddeltalen = None
1583 compresseddeltalen = None
1588 snapshotdepth = None
1584 snapshotdepth = None
1589 deltainfo = _deltainfo(
1585 deltainfo = _deltainfo(
1590 distance=distance,
1586 distance=distance,
1591 deltalen=deltalen,
1587 deltalen=deltalen,
1592 data=(header, data),
1588 data=(header, data),
1593 base=base,
1589 base=base,
1594 chainbase=chainbase,
1590 chainbase=chainbase,
1595 chainlen=chainlen,
1591 chainlen=chainlen,
1596 compresseddeltalen=compresseddeltalen,
1592 compresseddeltalen=compresseddeltalen,
1597 snapshotdepth=snapshotdepth,
1593 snapshotdepth=snapshotdepth,
1598 )
1594 )
1599
1595
1600 if deltainfo is not None:
1596 if deltainfo is not None:
1601 if gather_debug:
1597 if gather_debug:
1602 end = util.timer()
1598 end = util.timer()
1603 dbg['duration'] = end - start
1599 dbg['duration'] = end - start
1604 dbg[
1600 dbg[
1605 'delta-base'
1601 'delta-base'
1606 ] = deltainfo.base # pytype: disable=attribute-error
1602 ] = deltainfo.base # pytype: disable=attribute-error
1607 dbg['search_round_count'] = 0
1603 dbg['search_round_count'] = 0
1608 dbg['using-cached-base'] = True
1604 dbg['using-cached-base'] = True
1609 dbg['delta_try_count'] = 0
1605 dbg['delta_try_count'] = 0
1610 dbg['type'] = b"full"
1606 dbg['type'] = b"full"
1611 if snapshotdepth is None:
1607 if snapshotdepth is None:
1612 dbg['snapshot-depth'] = -1
1608 dbg['snapshot-depth'] = -1
1613 else:
1609 else:
1614 dbg['snapshot-depth'] = snapshotdepth
1610 dbg['snapshot-depth'] = snapshotdepth
1615 self._dbg_process_data(dbg)
1611 self._dbg_process_data(dbg)
1616 return deltainfo
1612 return deltainfo
1617
1613
1618 # count the number of different delta we tried (for debug purpose)
1614 # count the number of different delta we tried (for debug purpose)
1619 dbg_try_count = 0
1615 dbg_try_count = 0
1620 # count the number of "search round" we did. (for debug purpose)
1616 # count the number of "search round" we did. (for debug purpose)
1621 dbg_try_rounds = 0
1617 dbg_try_rounds = 0
1622 dbg_type = b'unknown'
1618 dbg_type = b'unknown'
1623
1619
1624 if p1r is None:
1620 if p1r is None:
1625 p1r = revlog.rev(revinfo.p1)
1621 p1r = revlog.rev(revinfo.p1)
1626 p2r = revlog.rev(revinfo.p2)
1622 p2r = revlog.rev(revinfo.p2)
1627
1623
1628 if self._debug_search:
1624 if self._debug_search:
1629 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1625 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1630 msg %= target_rev
1626 msg %= target_rev
1631 self._write_debug(msg)
1627 self._write_debug(msg)
1632
1628
1633 # should we try to build a delta?
1629 # should we try to build a delta?
1634 if not (len(self.revlog) and self.revlog._storedeltachains):
1630 if not (len(self.revlog) and self.revlog._storedeltachains):
1635 search_cls = _NoDeltaSearch
1631 search_cls = _NoDeltaSearch
1636 elif self.revlog.delta_config.sparse_revlog:
1632 elif self.revlog.delta_config.sparse_revlog:
1637 search_cls = _SparseDeltaSearch
1633 search_cls = _SparseDeltaSearch
1638 elif self.revlog.delta_config.general_delta:
1634 elif self.revlog.delta_config.general_delta:
1639 search_cls = _GeneralDeltaSearch
1635 search_cls = _GeneralDeltaSearch
1640 else:
1636 else:
1641 # before general delta, there is only one possible delta base
1637 # before general delta, there is only one possible delta base
1642 search_cls = _PrevDeltaSearch
1638 search_cls = _PrevDeltaSearch
1643
1639
1644 search = search_cls(
1640 search = search_cls(
1645 self.revlog,
1641 self.revlog,
1646 revinfo,
1642 revinfo,
1647 p1r,
1643 p1r,
1648 p2r,
1644 p2r,
1649 cachedelta,
1645 cachedelta,
1650 excluded_bases,
1646 excluded_bases,
1651 target_rev,
1647 target_rev,
1652 snapshot_cache=self._snapshot_cache,
1648 snapshot_cache=self._snapshot_cache,
1653 )
1649 )
1654
1650
1655 while not search.done:
1651 while not search.done:
1656 current_group = search.current_group
1652 current_group = search.current_group
1657 # current_group can be `None`, but not is search.done is False
1653 # current_group can be `None`, but not is search.done is False
1658 # We add this assert to help pytype
1654 # We add this assert to help pytype
1659 assert current_group is not None
1655 assert current_group is not None
1660 candidaterevs = current_group
1656 candidaterevs = current_group
1661 dbg_try_rounds += 1
1657 dbg_try_rounds += 1
1662 if self._debug_search:
1658 if self._debug_search:
1663 prev = None
1659 prev = None
1664 if deltainfo is not None:
1660 if deltainfo is not None:
1665 prev = deltainfo.base
1661 prev = deltainfo.base
1666
1662
1667 if (
1663 if (
1668 cachedelta is not None
1664 cachedelta is not None
1669 and len(candidaterevs) == 1
1665 and len(candidaterevs) == 1
1670 and cachedelta[0] in candidaterevs
1666 and cachedelta[0] in candidaterevs
1671 ):
1667 ):
1672 round_type = b"cached-delta"
1668 round_type = b"cached-delta"
1673 elif p1r in candidaterevs or p2r in candidaterevs:
1669 elif p1r in candidaterevs or p2r in candidaterevs:
1674 round_type = b"parents"
1670 round_type = b"parents"
1675 elif prev is not None and all(c < prev for c in candidaterevs):
1671 elif prev is not None and all(c < prev for c in candidaterevs):
1676 round_type = b"refine-down"
1672 round_type = b"refine-down"
1677 elif prev is not None and all(c > prev for c in candidaterevs):
1673 elif prev is not None and all(c > prev for c in candidaterevs):
1678 round_type = b"refine-up"
1674 round_type = b"refine-up"
1679 else:
1675 else:
1680 round_type = b"search-down"
1676 round_type = b"search-down"
1681 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1677 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1682 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1678 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1683 self._write_debug(msg)
1679 self._write_debug(msg)
1684 nominateddeltas = []
1680 nominateddeltas = []
1685 if deltainfo is not None:
1681 if deltainfo is not None:
1686 if self._debug_search:
1682 if self._debug_search:
1687 msg = (
1683 msg = (
1688 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1684 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1689 )
1685 )
1690 msg %= (deltainfo.base, deltainfo.deltalen)
1686 msg %= (deltainfo.base, deltainfo.deltalen)
1691 self._write_debug(msg)
1687 self._write_debug(msg)
1692 # if we already found a good delta,
1688 # if we already found a good delta,
1693 # challenge it against refined candidates
1689 # challenge it against refined candidates
1694 nominateddeltas.append(deltainfo)
1690 nominateddeltas.append(deltainfo)
1695 for candidaterev in candidaterevs:
1691 for candidaterev in candidaterevs:
1696 if self._debug_search:
1692 if self._debug_search:
1697 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1693 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1698 msg %= candidaterev
1694 msg %= candidaterev
1699 self._write_debug(msg)
1695 self._write_debug(msg)
1700 candidate_type = None
1696 candidate_type = None
1701 if candidaterev == p1r:
1697 if candidaterev == p1r:
1702 candidate_type = b"p1"
1698 candidate_type = b"p1"
1703 elif candidaterev == p2r:
1699 elif candidaterev == p2r:
1704 candidate_type = b"p2"
1700 candidate_type = b"p2"
1705 elif self.revlog.issnapshot(candidaterev):
1701 elif self.revlog.issnapshot(candidaterev):
1706 candidate_type = b"snapshot-%d"
1702 candidate_type = b"snapshot-%d"
1707 candidate_type %= self.revlog.snapshotdepth(
1703 candidate_type %= self.revlog.snapshotdepth(
1708 candidaterev
1704 candidaterev
1709 )
1705 )
1710
1706
1711 if candidate_type is not None:
1707 if candidate_type is not None:
1712 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1708 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1713 msg %= candidate_type
1709 msg %= candidate_type
1714 self._write_debug(msg)
1710 self._write_debug(msg)
1715 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1711 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1716 msg %= self.revlog.length(candidaterev)
1712 msg %= self.revlog.length(candidaterev)
1717 self._write_debug(msg)
1713 self._write_debug(msg)
1718 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1714 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1719 msg %= self.revlog.deltaparent(candidaterev)
1715 msg %= self.revlog.deltaparent(candidaterev)
1720 self._write_debug(msg)
1716 self._write_debug(msg)
1721
1717
1722 dbg_try_count += 1
1718 dbg_try_count += 1
1723
1719
1724 if self._debug_search:
1720 if self._debug_search:
1725 delta_start = util.timer()
1721 delta_start = util.timer()
1726 candidatedelta = self._builddeltainfo(
1722 candidatedelta = self._builddeltainfo(
1727 revinfo,
1723 revinfo,
1728 candidaterev,
1724 candidaterev,
1729 target_rev=target_rev,
1725 target_rev=target_rev,
1730 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1726 as_snapshot=search.current_stage == _STAGE_SNAPSHOT,
1731 )
1727 )
1732 if self._debug_search:
1728 if self._debug_search:
1733 delta_end = util.timer()
1729 delta_end = util.timer()
1734 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1730 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1735 msg %= delta_end - delta_start
1731 msg %= delta_end - delta_start
1736 self._write_debug(msg)
1732 self._write_debug(msg)
1737 if candidatedelta is not None:
1733 if candidatedelta is not None:
1738 if search.is_good_delta_info(candidatedelta):
1734 if search.is_good_delta_info(candidatedelta):
1739 if self._debug_search:
1735 if self._debug_search:
1740 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1736 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1741 msg %= candidatedelta.deltalen
1737 msg %= candidatedelta.deltalen
1742 self._write_debug(msg)
1738 self._write_debug(msg)
1743 nominateddeltas.append(candidatedelta)
1739 nominateddeltas.append(candidatedelta)
1744 elif self._debug_search:
1740 elif self._debug_search:
1745 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1741 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1746 msg %= candidatedelta.deltalen
1742 msg %= candidatedelta.deltalen
1747 self._write_debug(msg)
1743 self._write_debug(msg)
1748 elif self._debug_search:
1744 elif self._debug_search:
1749 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1745 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1750 self._write_debug(msg)
1746 self._write_debug(msg)
1751 if nominateddeltas:
1747 if nominateddeltas:
1752 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1748 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1753 search.next_group(deltainfo)
1749 search.next_group(deltainfo)
1754
1750
1755 if deltainfo is None:
1751 if deltainfo is None:
1756 dbg_type = b"full"
1752 dbg_type = b"full"
1757 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1753 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1758 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1754 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1759 dbg_type = b"snapshot"
1755 dbg_type = b"snapshot"
1760 else:
1756 else:
1761 dbg_type = b"delta"
1757 dbg_type = b"delta"
1762
1758
1763 if gather_debug:
1759 if gather_debug:
1764 end = util.timer()
1760 end = util.timer()
1765 if dbg_type == b'full':
1761 if dbg_type == b'full':
1766 used_cached = (
1762 used_cached = (
1767 cachedelta is not None
1763 cachedelta is not None
1768 and dbg_try_rounds == 0
1764 and dbg_try_rounds == 0
1769 and dbg_try_count == 0
1765 and dbg_try_count == 0
1770 and cachedelta[0] == nullrev
1766 and cachedelta[0] == nullrev
1771 )
1767 )
1772 else:
1768 else:
1773 used_cached = (
1769 used_cached = (
1774 cachedelta is not None
1770 cachedelta is not None
1775 and dbg_try_rounds == 1
1771 and dbg_try_rounds == 1
1776 and dbg_try_count == 1
1772 and dbg_try_count == 1
1777 and deltainfo.base == cachedelta[0]
1773 and deltainfo.base == cachedelta[0]
1778 )
1774 )
1779 dbg['duration'] = end - start
1775 dbg['duration'] = end - start
1780 dbg[
1776 dbg[
1781 'delta-base'
1777 'delta-base'
1782 ] = deltainfo.base # pytype: disable=attribute-error
1778 ] = deltainfo.base # pytype: disable=attribute-error
1783 dbg['search_round_count'] = dbg_try_rounds
1779 dbg['search_round_count'] = dbg_try_rounds
1784 dbg['using-cached-base'] = used_cached
1780 dbg['using-cached-base'] = used_cached
1785 dbg['delta_try_count'] = dbg_try_count
1781 dbg['delta_try_count'] = dbg_try_count
1786 dbg['type'] = dbg_type
1782 dbg['type'] = dbg_type
1787 if (
1783 if (
1788 deltainfo.snapshotdepth # pytype: disable=attribute-error
1784 deltainfo.snapshotdepth # pytype: disable=attribute-error
1789 is not None
1785 is not None
1790 ):
1786 ):
1791 dbg[
1787 dbg[
1792 'snapshot-depth'
1788 'snapshot-depth'
1793 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1789 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1794 else:
1790 else:
1795 dbg['snapshot-depth'] = -1
1791 dbg['snapshot-depth'] = -1
1796 self._dbg_process_data(dbg)
1792 self._dbg_process_data(dbg)
1797 return deltainfo
1793 return deltainfo
1798
1794
1799 def _one_dbg_data(self):
1795 def _one_dbg_data(self):
1800 dbg = {
1796 dbg = {
1801 'duration': None,
1797 'duration': None,
1802 'revision': None,
1798 'revision': None,
1803 'delta-base': None,
1799 'delta-base': None,
1804 'search_round_count': None,
1800 'search_round_count': None,
1805 'using-cached-base': None,
1801 'using-cached-base': None,
1806 'delta_try_count': None,
1802 'delta_try_count': None,
1807 'type': None,
1803 'type': None,
1808 'p1-chain-len': None,
1804 'p1-chain-len': None,
1809 'p2-chain-len': None,
1805 'p2-chain-len': None,
1810 'snapshot-depth': None,
1806 'snapshot-depth': None,
1811 'target-revlog': None,
1807 'target-revlog': None,
1812 }
1808 }
1813 target_revlog = b"UNKNOWN"
1809 target_revlog = b"UNKNOWN"
1814 target_type = self.revlog.target[0]
1810 target_type = self.revlog.target[0]
1815 target_key = self.revlog.target[1]
1811 target_key = self.revlog.target[1]
1816 if target_type == KIND_CHANGELOG:
1812 if target_type == KIND_CHANGELOG:
1817 target_revlog = b'CHANGELOG:'
1813 target_revlog = b'CHANGELOG:'
1818 elif target_type == KIND_MANIFESTLOG:
1814 elif target_type == KIND_MANIFESTLOG:
1819 target_revlog = b'MANIFESTLOG:'
1815 target_revlog = b'MANIFESTLOG:'
1820 if target_key:
1816 if target_key:
1821 target_revlog += b'%s:' % target_key
1817 target_revlog += b'%s:' % target_key
1822 elif target_type == KIND_FILELOG:
1818 elif target_type == KIND_FILELOG:
1823 target_revlog = b'FILELOG:'
1819 target_revlog = b'FILELOG:'
1824 if target_key:
1820 if target_key:
1825 target_revlog += b'%s:' % target_key
1821 target_revlog += b'%s:' % target_key
1826 dbg['target-revlog'] = target_revlog
1822 dbg['target-revlog'] = target_revlog
1827 return dbg
1823 return dbg
1828
1824
1829 def _dbg_process_data(self, dbg):
1825 def _dbg_process_data(self, dbg):
1830 if self._debug_info is not None:
1826 if self._debug_info is not None:
1831 self._debug_info.append(dbg)
1827 self._debug_info.append(dbg)
1832
1828
1833 if self._write_debug is not None:
1829 if self._write_debug is not None:
1834 msg = (
1830 msg = (
1835 b"DBG-DELTAS:"
1831 b"DBG-DELTAS:"
1836 b" %-12s"
1832 b" %-12s"
1837 b" rev=%d:"
1833 b" rev=%d:"
1838 b" delta-base=%d"
1834 b" delta-base=%d"
1839 b" is-cached=%d"
1835 b" is-cached=%d"
1840 b" - search-rounds=%d"
1836 b" - search-rounds=%d"
1841 b" try-count=%d"
1837 b" try-count=%d"
1842 b" - delta-type=%-6s"
1838 b" - delta-type=%-6s"
1843 b" snap-depth=%d"
1839 b" snap-depth=%d"
1844 b" - p1-chain-length=%d"
1840 b" - p1-chain-length=%d"
1845 b" p2-chain-length=%d"
1841 b" p2-chain-length=%d"
1846 b" - duration=%f"
1842 b" - duration=%f"
1847 b"\n"
1843 b"\n"
1848 )
1844 )
1849 msg %= (
1845 msg %= (
1850 dbg["target-revlog"],
1846 dbg["target-revlog"],
1851 dbg["revision"],
1847 dbg["revision"],
1852 dbg["delta-base"],
1848 dbg["delta-base"],
1853 dbg["using-cached-base"],
1849 dbg["using-cached-base"],
1854 dbg["search_round_count"],
1850 dbg["search_round_count"],
1855 dbg["delta_try_count"],
1851 dbg["delta_try_count"],
1856 dbg["type"],
1852 dbg["type"],
1857 dbg["snapshot-depth"],
1853 dbg["snapshot-depth"],
1858 dbg["p1-chain-len"],
1854 dbg["p1-chain-len"],
1859 dbg["p2-chain-len"],
1855 dbg["p2-chain-len"],
1860 dbg["duration"],
1856 dbg["duration"],
1861 )
1857 )
1862 self._write_debug(msg)
1858 self._write_debug(msg)
1863
1859
1864
1860
1865 def delta_compression(default_compression_header, deltainfo):
1861 def delta_compression(default_compression_header, deltainfo):
1866 """return (COMPRESSION_MODE, deltainfo)
1862 """return (COMPRESSION_MODE, deltainfo)
1867
1863
1868 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1864 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1869 compression.
1865 compression.
1870 """
1866 """
1871 h, d = deltainfo.data
1867 h, d = deltainfo.data
1872 compression_mode = COMP_MODE_INLINE
1868 compression_mode = COMP_MODE_INLINE
1873 if not h and not d:
1869 if not h and not d:
1874 # not data to store at all... declare them uncompressed
1870 # not data to store at all... declare them uncompressed
1875 compression_mode = COMP_MODE_PLAIN
1871 compression_mode = COMP_MODE_PLAIN
1876 elif not h:
1872 elif not h:
1877 t = d[0:1]
1873 t = d[0:1]
1878 if t == b'\0':
1874 if t == b'\0':
1879 compression_mode = COMP_MODE_PLAIN
1875 compression_mode = COMP_MODE_PLAIN
1880 elif t == default_compression_header:
1876 elif t == default_compression_header:
1881 compression_mode = COMP_MODE_DEFAULT
1877 compression_mode = COMP_MODE_DEFAULT
1882 elif h == b'u':
1878 elif h == b'u':
1883 # we have a more efficient way to declare uncompressed
1879 # we have a more efficient way to declare uncompressed
1884 h = b''
1880 h = b''
1885 compression_mode = COMP_MODE_PLAIN
1881 compression_mode = COMP_MODE_PLAIN
1886 deltainfo = drop_u_compression(deltainfo)
1882 deltainfo = drop_u_compression(deltainfo)
1887 return compression_mode, deltainfo
1883 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now