##// END OF EJS Templates
delta-find: split the "sparse" part of `_pre_filter_rev` in a method...
marmoute -
r52232:9a1239c3 default
parent child Browse files
Show More
@@ -1,1722 +1,1734 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .constants import (
18 from .constants import (
19 COMP_MODE_DEFAULT,
19 COMP_MODE_DEFAULT,
20 COMP_MODE_INLINE,
20 COMP_MODE_INLINE,
21 COMP_MODE_PLAIN,
21 COMP_MODE_PLAIN,
22 DELTA_BASE_REUSE_FORCE,
22 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 from .. import revlog
50 from .. import revlog
51
51
52 self._data = data
52 self._data = data
53 self.data_config = revlog.DataConfig()
53 self.data_config = revlog.DataConfig()
54 self.data_config.sr_density_threshold = density
54 self.data_config.sr_density_threshold = density
55 self.data_config.sr_min_gap_size = mingap
55 self.data_config.sr_min_gap_size = mingap
56 self.delta_config = revlog.DeltaConfig()
56 self.delta_config = revlog.DeltaConfig()
57 self.feature_config = revlog.FeatureConfig()
57 self.feature_config = revlog.FeatureConfig()
58 self._snapshot = set(snapshot)
58 self._snapshot = set(snapshot)
59 self.index = None
59 self.index = None
60
60
61 def start(self, rev):
61 def start(self, rev):
62 if rev == nullrev:
62 if rev == nullrev:
63 return 0
63 return 0
64 if rev == 0:
64 if rev == 0:
65 return 0
65 return 0
66 return self._data[rev - 1]
66 return self._data[rev - 1]
67
67
68 def end(self, rev):
68 def end(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return 0
70 return 0
71 return self._data[rev]
71 return self._data[rev]
72
72
73 def length(self, rev):
73 def length(self, rev):
74 return self.end(rev) - self.start(rev)
74 return self.end(rev) - self.start(rev)
75
75
76 def __len__(self):
76 def __len__(self):
77 return len(self._data)
77 return len(self._data)
78
78
79 def issnapshot(self, rev):
79 def issnapshot(self, rev):
80 if rev == nullrev:
80 if rev == nullrev:
81 return True
81 return True
82 return rev in self._snapshot
82 return rev in self._snapshot
83
83
84
84
85 def slicechunk(revlog, revs, targetsize=None):
85 def slicechunk(revlog, revs, targetsize=None):
86 """slice revs to reduce the amount of unrelated data to be read from disk.
86 """slice revs to reduce the amount of unrelated data to be read from disk.
87
87
88 ``revs`` is sliced into groups that should be read in one time.
88 ``revs`` is sliced into groups that should be read in one time.
89 Assume that revs are sorted.
89 Assume that revs are sorted.
90
90
91 The initial chunk is sliced until the overall density (payload/chunks-span
91 The initial chunk is sliced until the overall density (payload/chunks-span
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
92 ratio) is above `revlog.data_config.sr_density_threshold`. No gap smaller
93 than `revlog.data_config.sr_min_gap_size` is skipped.
93 than `revlog.data_config.sr_min_gap_size` is skipped.
94
94
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
95 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
96 For consistency with other slicing choice, this limit won't go lower than
96 For consistency with other slicing choice, this limit won't go lower than
97 `revlog.data_config.sr_min_gap_size`.
97 `revlog.data_config.sr_min_gap_size`.
98
98
99 If individual revisions chunk are larger than this limit, they will still
99 If individual revisions chunk are larger than this limit, they will still
100 be raised individually.
100 be raised individually.
101
101
102 >>> data = [
102 >>> data = [
103 ... 5, #00 (5)
103 ... 5, #00 (5)
104 ... 10, #01 (5)
104 ... 10, #01 (5)
105 ... 12, #02 (2)
105 ... 12, #02 (2)
106 ... 12, #03 (empty)
106 ... 12, #03 (empty)
107 ... 27, #04 (15)
107 ... 27, #04 (15)
108 ... 31, #05 (4)
108 ... 31, #05 (4)
109 ... 31, #06 (empty)
109 ... 31, #06 (empty)
110 ... 42, #07 (11)
110 ... 42, #07 (11)
111 ... 47, #08 (5)
111 ... 47, #08 (5)
112 ... 47, #09 (empty)
112 ... 47, #09 (empty)
113 ... 48, #10 (1)
113 ... 48, #10 (1)
114 ... 51, #11 (3)
114 ... 51, #11 (3)
115 ... 74, #12 (23)
115 ... 74, #12 (23)
116 ... 85, #13 (11)
116 ... 85, #13 (11)
117 ... 86, #14 (1)
117 ... 86, #14 (1)
118 ... 91, #15 (5)
118 ... 91, #15 (5)
119 ... ]
119 ... ]
120 >>> revlog = _testrevlog(data, snapshot=range(16))
120 >>> revlog = _testrevlog(data, snapshot=range(16))
121
121
122 >>> list(slicechunk(revlog, list(range(16))))
122 >>> list(slicechunk(revlog, list(range(16))))
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
123 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
124 >>> list(slicechunk(revlog, [0, 15]))
124 >>> list(slicechunk(revlog, [0, 15]))
125 [[0], [15]]
125 [[0], [15]]
126 >>> list(slicechunk(revlog, [0, 11, 15]))
126 >>> list(slicechunk(revlog, [0, 11, 15]))
127 [[0], [11], [15]]
127 [[0], [11], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
129 [[0], [11, 13, 15]]
129 [[0], [11, 13, 15]]
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
130 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
131 [[1, 2], [5, 8, 10, 11], [14]]
131 [[1, 2], [5, 8, 10, 11], [14]]
132
132
133 Slicing with a maximum chunk size
133 Slicing with a maximum chunk size
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
134 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
135 [[0], [11], [13], [15]]
135 [[0], [11], [13], [15]]
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
137 [[0], [11], [13, 15]]
137 [[0], [11], [13, 15]]
138
138
139 Slicing involving nullrev
139 Slicing involving nullrev
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
140 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
141 [[-1, 0], [11], [13, 15]]
141 [[-1, 0], [11], [13, 15]]
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
142 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
143 [[-1], [13], [15]]
143 [[-1], [13], [15]]
144 """
144 """
145 if targetsize is not None:
145 if targetsize is not None:
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
146 targetsize = max(targetsize, revlog.data_config.sr_min_gap_size)
147 # targetsize should not be specified when evaluating delta candidates:
147 # targetsize should not be specified when evaluating delta candidates:
148 # * targetsize is used to ensure we stay within specification when reading,
148 # * targetsize is used to ensure we stay within specification when reading,
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
149 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
150 if densityslicing is None:
150 if densityslicing is None:
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
151 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
152 for chunk in densityslicing(
152 for chunk in densityslicing(
153 revs,
153 revs,
154 revlog.data_config.sr_density_threshold,
154 revlog.data_config.sr_density_threshold,
155 revlog.data_config.sr_min_gap_size,
155 revlog.data_config.sr_min_gap_size,
156 ):
156 ):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
157 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
158 yield subchunk
158 yield subchunk
159
159
160
160
161 def _slicechunktosize(revlog, revs, targetsize=None):
161 def _slicechunktosize(revlog, revs, targetsize=None):
162 """slice revs to match the target size
162 """slice revs to match the target size
163
163
164 This is intended to be used on chunk that density slicing selected by that
164 This is intended to be used on chunk that density slicing selected by that
165 are still too large compared to the read garantee of revlog. This might
165 are still too large compared to the read garantee of revlog. This might
166 happens when "minimal gap size" interrupted the slicing or when chain are
166 happens when "minimal gap size" interrupted the slicing or when chain are
167 built in a way that create large blocks next to each other.
167 built in a way that create large blocks next to each other.
168
168
169 >>> data = [
169 >>> data = [
170 ... 3, #0 (3)
170 ... 3, #0 (3)
171 ... 5, #1 (2)
171 ... 5, #1 (2)
172 ... 6, #2 (1)
172 ... 6, #2 (1)
173 ... 8, #3 (2)
173 ... 8, #3 (2)
174 ... 8, #4 (empty)
174 ... 8, #4 (empty)
175 ... 11, #5 (3)
175 ... 11, #5 (3)
176 ... 12, #6 (1)
176 ... 12, #6 (1)
177 ... 13, #7 (1)
177 ... 13, #7 (1)
178 ... 14, #8 (1)
178 ... 14, #8 (1)
179 ... ]
179 ... ]
180
180
181 == All snapshots cases ==
181 == All snapshots cases ==
182 >>> revlog = _testrevlog(data, snapshot=range(9))
182 >>> revlog = _testrevlog(data, snapshot=range(9))
183
183
184 Cases where chunk is already small enough
184 Cases where chunk is already small enough
185 >>> list(_slicechunktosize(revlog, [0], 3))
185 >>> list(_slicechunktosize(revlog, [0], 3))
186 [[0]]
186 [[0]]
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
187 >>> list(_slicechunktosize(revlog, [6, 7], 3))
188 [[6, 7]]
188 [[6, 7]]
189 >>> list(_slicechunktosize(revlog, [0], None))
189 >>> list(_slicechunktosize(revlog, [0], None))
190 [[0]]
190 [[0]]
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
191 >>> list(_slicechunktosize(revlog, [6, 7], None))
192 [[6, 7]]
192 [[6, 7]]
193
193
194 cases where we need actual slicing
194 cases where we need actual slicing
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
195 >>> list(_slicechunktosize(revlog, [0, 1], 3))
196 [[0], [1]]
196 [[0], [1]]
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
197 >>> list(_slicechunktosize(revlog, [1, 3], 3))
198 [[1], [3]]
198 [[1], [3]]
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
199 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
200 [[1, 2], [3]]
200 [[1, 2], [3]]
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
201 >>> list(_slicechunktosize(revlog, [3, 5], 3))
202 [[3], [5]]
202 [[3], [5]]
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
203 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
204 [[3], [5]]
204 [[3], [5]]
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
205 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
206 [[5], [6, 7, 8]]
206 [[5], [6, 7, 8]]
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
207 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
208 [[0], [1, 2], [3], [5], [6, 7, 8]]
209
209
210 Case with too large individual chunk (must return valid chunk)
210 Case with too large individual chunk (must return valid chunk)
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
211 >>> list(_slicechunktosize(revlog, [0, 1], 2))
212 [[0], [1]]
212 [[0], [1]]
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
213 >>> list(_slicechunktosize(revlog, [1, 3], 1))
214 [[1], [3]]
214 [[1], [3]]
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
215 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
216 [[3], [5]]
216 [[3], [5]]
217
217
218 == No Snapshot cases ==
218 == No Snapshot cases ==
219 >>> revlog = _testrevlog(data)
219 >>> revlog = _testrevlog(data)
220
220
221 Cases where chunk is already small enough
221 Cases where chunk is already small enough
222 >>> list(_slicechunktosize(revlog, [0], 3))
222 >>> list(_slicechunktosize(revlog, [0], 3))
223 [[0]]
223 [[0]]
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
224 >>> list(_slicechunktosize(revlog, [6, 7], 3))
225 [[6, 7]]
225 [[6, 7]]
226 >>> list(_slicechunktosize(revlog, [0], None))
226 >>> list(_slicechunktosize(revlog, [0], None))
227 [[0]]
227 [[0]]
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
228 >>> list(_slicechunktosize(revlog, [6, 7], None))
229 [[6, 7]]
229 [[6, 7]]
230
230
231 cases where we need actual slicing
231 cases where we need actual slicing
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
232 >>> list(_slicechunktosize(revlog, [0, 1], 3))
233 [[0], [1]]
233 [[0], [1]]
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
234 >>> list(_slicechunktosize(revlog, [1, 3], 3))
235 [[1], [3]]
235 [[1], [3]]
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
236 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
237 [[1], [2, 3]]
237 [[1], [2, 3]]
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
238 >>> list(_slicechunktosize(revlog, [3, 5], 3))
239 [[3], [5]]
239 [[3], [5]]
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
240 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
241 [[3], [4, 5]]
241 [[3], [4, 5]]
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
242 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
243 [[5], [6, 7, 8]]
243 [[5], [6, 7, 8]]
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
244 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
245 [[0], [1, 2], [3], [5], [6, 7, 8]]
246
246
247 Case with too large individual chunk (must return valid chunk)
247 Case with too large individual chunk (must return valid chunk)
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
248 >>> list(_slicechunktosize(revlog, [0, 1], 2))
249 [[0], [1]]
249 [[0], [1]]
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
250 >>> list(_slicechunktosize(revlog, [1, 3], 1))
251 [[1], [3]]
251 [[1], [3]]
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
252 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
253 [[3], [5]]
253 [[3], [5]]
254
254
255 == mixed case ==
255 == mixed case ==
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
256 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
257 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
258 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
259 """
259 """
260 assert targetsize is None or 0 <= targetsize
260 assert targetsize is None or 0 <= targetsize
261 startdata = revlog.start(revs[0])
261 startdata = revlog.start(revs[0])
262 enddata = revlog.end(revs[-1])
262 enddata = revlog.end(revs[-1])
263 fullspan = enddata - startdata
263 fullspan = enddata - startdata
264 if targetsize is None or fullspan <= targetsize:
264 if targetsize is None or fullspan <= targetsize:
265 yield revs
265 yield revs
266 return
266 return
267
267
268 startrevidx = 0
268 startrevidx = 0
269 endrevidx = 1
269 endrevidx = 1
270 iterrevs = enumerate(revs)
270 iterrevs = enumerate(revs)
271 next(iterrevs) # skip first rev.
271 next(iterrevs) # skip first rev.
272 # first step: get snapshots out of the way
272 # first step: get snapshots out of the way
273 for idx, r in iterrevs:
273 for idx, r in iterrevs:
274 span = revlog.end(r) - startdata
274 span = revlog.end(r) - startdata
275 snapshot = revlog.issnapshot(r)
275 snapshot = revlog.issnapshot(r)
276 if span <= targetsize and snapshot:
276 if span <= targetsize and snapshot:
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 else:
278 else:
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
279 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
280 if chunk:
280 if chunk:
281 yield chunk
281 yield chunk
282 startrevidx = idx
282 startrevidx = idx
283 startdata = revlog.start(r)
283 startdata = revlog.start(r)
284 endrevidx = idx + 1
284 endrevidx = idx + 1
285 if not snapshot:
285 if not snapshot:
286 break
286 break
287
287
288 # for the others, we use binary slicing to quickly converge toward valid
288 # for the others, we use binary slicing to quickly converge toward valid
289 # chunks (otherwise, we might end up looking for start/end of many
289 # chunks (otherwise, we might end up looking for start/end of many
290 # revisions). This logic is not looking for the perfect slicing point, it
290 # revisions). This logic is not looking for the perfect slicing point, it
291 # focuses on quickly converging toward valid chunks.
291 # focuses on quickly converging toward valid chunks.
292 nbitem = len(revs)
292 nbitem = len(revs)
293 while (enddata - startdata) > targetsize:
293 while (enddata - startdata) > targetsize:
294 endrevidx = nbitem
294 endrevidx = nbitem
295 if nbitem - startrevidx <= 1:
295 if nbitem - startrevidx <= 1:
296 break # protect against individual chunk larger than limit
296 break # protect against individual chunk larger than limit
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 while span > targetsize:
299 while span > targetsize:
300 if endrevidx - startrevidx <= 1:
300 if endrevidx - startrevidx <= 1:
301 break # protect against individual chunk larger than limit
301 break # protect against individual chunk larger than limit
302 endrevidx -= (endrevidx - startrevidx) // 2
302 endrevidx -= (endrevidx - startrevidx) // 2
303 localenddata = revlog.end(revs[endrevidx - 1])
303 localenddata = revlog.end(revs[endrevidx - 1])
304 span = localenddata - startdata
304 span = localenddata - startdata
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308 startrevidx = endrevidx
308 startrevidx = endrevidx
309 startdata = revlog.start(revs[startrevidx])
309 startdata = revlog.start(revs[startrevidx])
310
310
311 chunk = _trimchunk(revlog, revs, startrevidx)
311 chunk = _trimchunk(revlog, revs, startrevidx)
312 if chunk:
312 if chunk:
313 yield chunk
313 yield chunk
314
314
315
315
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
316 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
317 """slice revs to reduce the amount of unrelated data to be read from disk.
317 """slice revs to reduce the amount of unrelated data to be read from disk.
318
318
319 ``revs`` is sliced into groups that should be read in one time.
319 ``revs`` is sliced into groups that should be read in one time.
320 Assume that revs are sorted.
320 Assume that revs are sorted.
321
321
322 The initial chunk is sliced until the overall density (payload/chunks-span
322 The initial chunk is sliced until the overall density (payload/chunks-span
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
323 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
324 skipped.
324 skipped.
325
325
326 >>> revlog = _testrevlog([
326 >>> revlog = _testrevlog([
327 ... 5, #00 (5)
327 ... 5, #00 (5)
328 ... 10, #01 (5)
328 ... 10, #01 (5)
329 ... 12, #02 (2)
329 ... 12, #02 (2)
330 ... 12, #03 (empty)
330 ... 12, #03 (empty)
331 ... 27, #04 (15)
331 ... 27, #04 (15)
332 ... 31, #05 (4)
332 ... 31, #05 (4)
333 ... 31, #06 (empty)
333 ... 31, #06 (empty)
334 ... 42, #07 (11)
334 ... 42, #07 (11)
335 ... 47, #08 (5)
335 ... 47, #08 (5)
336 ... 47, #09 (empty)
336 ... 47, #09 (empty)
337 ... 48, #10 (1)
337 ... 48, #10 (1)
338 ... 51, #11 (3)
338 ... 51, #11 (3)
339 ... 74, #12 (23)
339 ... 74, #12 (23)
340 ... 85, #13 (11)
340 ... 85, #13 (11)
341 ... 86, #14 (1)
341 ... 86, #14 (1)
342 ... 91, #15 (5)
342 ... 91, #15 (5)
343 ... ])
343 ... ])
344
344
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
345 >>> list(_slicechunktodensity(revlog, list(range(16))))
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
346 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
347 >>> list(_slicechunktodensity(revlog, [0, 15]))
348 [[0], [15]]
348 [[0], [15]]
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
349 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
350 [[0], [11], [15]]
350 [[0], [11], [15]]
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
351 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
352 [[0], [11, 13, 15]]
352 [[0], [11, 13, 15]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
354 [[1, 2], [5, 8, 10, 11], [14]]
354 [[1, 2], [5, 8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... mingapsize=20))
356 ... mingapsize=20))
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
357 [[1, 2, 3, 5, 8, 10, 11], [14]]
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
358 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
359 ... targetdensity=0.95))
359 ... targetdensity=0.95))
360 [[1, 2], [5], [8, 10, 11], [14]]
360 [[1, 2], [5], [8, 10, 11], [14]]
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
361 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
362 ... targetdensity=0.95, mingapsize=12))
362 ... targetdensity=0.95, mingapsize=12))
363 [[1, 2], [5, 8, 10, 11], [14]]
363 [[1, 2], [5, 8, 10, 11], [14]]
364 """
364 """
365 start = revlog.start
365 start = revlog.start
366 length = revlog.length
366 length = revlog.length
367
367
368 if len(revs) <= 1:
368 if len(revs) <= 1:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 deltachainspan = segmentspan(revlog, revs)
372 deltachainspan = segmentspan(revlog, revs)
373
373
374 if deltachainspan < mingapsize:
374 if deltachainspan < mingapsize:
375 yield revs
375 yield revs
376 return
376 return
377
377
378 readdata = deltachainspan
378 readdata = deltachainspan
379 chainpayload = sum(length(r) for r in revs)
379 chainpayload = sum(length(r) for r in revs)
380
380
381 if deltachainspan:
381 if deltachainspan:
382 density = chainpayload / float(deltachainspan)
382 density = chainpayload / float(deltachainspan)
383 else:
383 else:
384 density = 1.0
384 density = 1.0
385
385
386 if density >= targetdensity:
386 if density >= targetdensity:
387 yield revs
387 yield revs
388 return
388 return
389
389
390 # Store the gaps in a heap to have them sorted by decreasing size
390 # Store the gaps in a heap to have them sorted by decreasing size
391 gaps = []
391 gaps = []
392 prevend = None
392 prevend = None
393 for i, rev in enumerate(revs):
393 for i, rev in enumerate(revs):
394 revstart = start(rev)
394 revstart = start(rev)
395 revlen = length(rev)
395 revlen = length(rev)
396
396
397 # Skip empty revisions to form larger holes
397 # Skip empty revisions to form larger holes
398 if revlen == 0:
398 if revlen == 0:
399 continue
399 continue
400
400
401 if prevend is not None:
401 if prevend is not None:
402 gapsize = revstart - prevend
402 gapsize = revstart - prevend
403 # only consider holes that are large enough
403 # only consider holes that are large enough
404 if gapsize > mingapsize:
404 if gapsize > mingapsize:
405 gaps.append((gapsize, i))
405 gaps.append((gapsize, i))
406
406
407 prevend = revstart + revlen
407 prevend = revstart + revlen
408 # sort the gaps to pop them from largest to small
408 # sort the gaps to pop them from largest to small
409 gaps.sort()
409 gaps.sort()
410
410
411 # Collect the indices of the largest holes until the density is acceptable
411 # Collect the indices of the largest holes until the density is acceptable
412 selected = []
412 selected = []
413 while gaps and density < targetdensity:
413 while gaps and density < targetdensity:
414 gapsize, gapidx = gaps.pop()
414 gapsize, gapidx = gaps.pop()
415
415
416 selected.append(gapidx)
416 selected.append(gapidx)
417
417
418 # the gap sizes are stored as negatives to be sorted decreasingly
418 # the gap sizes are stored as negatives to be sorted decreasingly
419 # by the heap
419 # by the heap
420 readdata -= gapsize
420 readdata -= gapsize
421 if readdata > 0:
421 if readdata > 0:
422 density = chainpayload / float(readdata)
422 density = chainpayload / float(readdata)
423 else:
423 else:
424 density = 1.0
424 density = 1.0
425 selected.sort()
425 selected.sort()
426
426
427 # Cut the revs at collected indices
427 # Cut the revs at collected indices
428 previdx = 0
428 previdx = 0
429 for idx in selected:
429 for idx in selected:
430
430
431 chunk = _trimchunk(revlog, revs, previdx, idx)
431 chunk = _trimchunk(revlog, revs, previdx, idx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435 previdx = idx
435 previdx = idx
436
436
437 chunk = _trimchunk(revlog, revs, previdx)
437 chunk = _trimchunk(revlog, revs, previdx)
438 if chunk:
438 if chunk:
439 yield chunk
439 yield chunk
440
440
441
441
442 def _trimchunk(revlog, revs, startidx, endidx=None):
442 def _trimchunk(revlog, revs, startidx, endidx=None):
443 """returns revs[startidx:endidx] without empty trailing revs
443 """returns revs[startidx:endidx] without empty trailing revs
444
444
445 Doctest Setup
445 Doctest Setup
446 >>> revlog = _testrevlog([
446 >>> revlog = _testrevlog([
447 ... 5, #0
447 ... 5, #0
448 ... 10, #1
448 ... 10, #1
449 ... 12, #2
449 ... 12, #2
450 ... 12, #3 (empty)
450 ... 12, #3 (empty)
451 ... 17, #4
451 ... 17, #4
452 ... 21, #5
452 ... 21, #5
453 ... 21, #6 (empty)
453 ... 21, #6 (empty)
454 ... ])
454 ... ])
455
455
456 Contiguous cases:
456 Contiguous cases:
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
458 [0, 1, 2, 3, 4, 5]
458 [0, 1, 2, 3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
460 [0, 1, 2, 3, 4]
460 [0, 1, 2, 3, 4]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
462 [0, 1, 2]
462 [0, 1, 2]
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
463 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
464 [2]
464 [2]
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
465 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
466 [3, 4, 5]
466 [3, 4, 5]
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
467 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
468 [3, 4]
468 [3, 4]
469
469
470 Discontiguous cases:
470 Discontiguous cases:
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
472 [1, 3, 5]
472 [1, 3, 5]
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
473 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
474 [1]
474 [1]
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
475 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
476 [3, 5]
476 [3, 5]
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
477 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
478 [3, 5]
478 [3, 5]
479 """
479 """
480 length = revlog.length
480 length = revlog.length
481
481
482 if endidx is None:
482 if endidx is None:
483 endidx = len(revs)
483 endidx = len(revs)
484
484
485 # If we have a non-emtpy delta candidate, there are nothing to trim
485 # If we have a non-emtpy delta candidate, there are nothing to trim
486 if revs[endidx - 1] < len(revlog):
486 if revs[endidx - 1] < len(revlog):
487 # Trim empty revs at the end, except the very first revision of a chain
487 # Trim empty revs at the end, except the very first revision of a chain
488 while (
488 while (
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
489 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
490 ):
490 ):
491 endidx -= 1
491 endidx -= 1
492
492
493 return revs[startidx:endidx]
493 return revs[startidx:endidx]
494
494
495
495
496 def segmentspan(revlog, revs):
496 def segmentspan(revlog, revs):
497 """Get the byte span of a segment of revisions
497 """Get the byte span of a segment of revisions
498
498
499 revs is a sorted array of revision numbers
499 revs is a sorted array of revision numbers
500
500
501 >>> revlog = _testrevlog([
501 >>> revlog = _testrevlog([
502 ... 5, #0
502 ... 5, #0
503 ... 10, #1
503 ... 10, #1
504 ... 12, #2
504 ... 12, #2
505 ... 12, #3 (empty)
505 ... 12, #3 (empty)
506 ... 17, #4
506 ... 17, #4
507 ... ])
507 ... ])
508
508
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
509 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
510 17
510 17
511 >>> segmentspan(revlog, [0, 4])
511 >>> segmentspan(revlog, [0, 4])
512 17
512 17
513 >>> segmentspan(revlog, [3, 4])
513 >>> segmentspan(revlog, [3, 4])
514 5
514 5
515 >>> segmentspan(revlog, [1, 2, 3,])
515 >>> segmentspan(revlog, [1, 2, 3,])
516 7
516 7
517 >>> segmentspan(revlog, [1, 3])
517 >>> segmentspan(revlog, [1, 3])
518 7
518 7
519 """
519 """
520 if not revs:
520 if not revs:
521 return 0
521 return 0
522 end = revlog.end(revs[-1])
522 end = revlog.end(revs[-1])
523 return end - revlog.start(revs[0])
523 return end - revlog.start(revs[0])
524
524
525
525
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
526 def _textfromdelta(revlog, baserev, delta, p1, p2, flags, expectednode):
527 """build full text from a (base, delta) pair and other metadata"""
527 """build full text from a (base, delta) pair and other metadata"""
528 # special case deltas which replace entire base; no need to decode
528 # special case deltas which replace entire base; no need to decode
529 # base revision. this neatly avoids censored bases, which throw when
529 # base revision. this neatly avoids censored bases, which throw when
530 # they're decoded.
530 # they're decoded.
531 hlen = struct.calcsize(b">lll")
531 hlen = struct.calcsize(b">lll")
532 if delta[:hlen] == mdiff.replacediffheader(
532 if delta[:hlen] == mdiff.replacediffheader(
533 revlog.rawsize(baserev), len(delta) - hlen
533 revlog.rawsize(baserev), len(delta) - hlen
534 ):
534 ):
535 fulltext = delta[hlen:]
535 fulltext = delta[hlen:]
536 else:
536 else:
537 # deltabase is rawtext before changed by flag processors, which is
537 # deltabase is rawtext before changed by flag processors, which is
538 # equivalent to non-raw text
538 # equivalent to non-raw text
539 basetext = revlog.revision(baserev)
539 basetext = revlog.revision(baserev)
540 fulltext = mdiff.patch(basetext, delta)
540 fulltext = mdiff.patch(basetext, delta)
541
541
542 try:
542 try:
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
543 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
544 if validatehash:
544 if validatehash:
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
545 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
546 if flags & REVIDX_ISCENSORED:
546 if flags & REVIDX_ISCENSORED:
547 raise error.StorageError(
547 raise error.StorageError(
548 _(b'node %s is not censored') % expectednode
548 _(b'node %s is not censored') % expectednode
549 )
549 )
550 except error.CensoredNodeError:
550 except error.CensoredNodeError:
551 # must pass the censored index flag to add censored revisions
551 # must pass the censored index flag to add censored revisions
552 if not flags & REVIDX_ISCENSORED:
552 if not flags & REVIDX_ISCENSORED:
553 raise
553 raise
554 return fulltext
554 return fulltext
555
555
556
556
557 @attr.s(slots=True, frozen=True)
557 @attr.s(slots=True, frozen=True)
558 class _deltainfo:
558 class _deltainfo:
559 distance = attr.ib()
559 distance = attr.ib()
560 deltalen = attr.ib()
560 deltalen = attr.ib()
561 data = attr.ib()
561 data = attr.ib()
562 base = attr.ib()
562 base = attr.ib()
563 chainbase = attr.ib()
563 chainbase = attr.ib()
564 chainlen = attr.ib()
564 chainlen = attr.ib()
565 compresseddeltalen = attr.ib()
565 compresseddeltalen = attr.ib()
566 snapshotdepth = attr.ib()
566 snapshotdepth = attr.ib()
567
567
568
568
569 def drop_u_compression(delta):
569 def drop_u_compression(delta):
570 """turn into a "u" (no-compression) into no-compression without header
570 """turn into a "u" (no-compression) into no-compression without header
571
571
572 This is useful for revlog format that has better compression method.
572 This is useful for revlog format that has better compression method.
573 """
573 """
574 assert delta.data[0] == b'u', delta.data[0]
574 assert delta.data[0] == b'u', delta.data[0]
575 return _deltainfo(
575 return _deltainfo(
576 delta.distance,
576 delta.distance,
577 delta.deltalen - 1,
577 delta.deltalen - 1,
578 (b'', delta.data[1]),
578 (b'', delta.data[1]),
579 delta.base,
579 delta.base,
580 delta.chainbase,
580 delta.chainbase,
581 delta.chainlen,
581 delta.chainlen,
582 delta.compresseddeltalen,
582 delta.compresseddeltalen,
583 delta.snapshotdepth,
583 delta.snapshotdepth,
584 )
584 )
585
585
586
586
587 # If a revision's full text is that much bigger than a base candidate full
587 # If a revision's full text is that much bigger than a base candidate full
588 # text's, it is very unlikely that it will produce a valid delta. We no longer
588 # text's, it is very unlikely that it will produce a valid delta. We no longer
589 # consider these candidates.
589 # consider these candidates.
590 LIMIT_BASE2TEXT = 500
590 LIMIT_BASE2TEXT = 500
591
591
592
592
593 class _DeltaSearch:
593 class _DeltaSearch:
594 """perform the search of a good delta for a single revlog revision
594 """perform the search of a good delta for a single revlog revision
595
595
596 note: some of the deltacomputer.finddeltainfo logic should probably move
596 note: some of the deltacomputer.finddeltainfo logic should probably move
597 here.
597 here.
598 """
598 """
599
599
600 def __init__(
600 def __init__(
601 self,
601 self,
602 revlog,
602 revlog,
603 revinfo,
603 revinfo,
604 p1,
604 p1,
605 p2,
605 p2,
606 cachedelta,
606 cachedelta,
607 excluded_bases=None,
607 excluded_bases=None,
608 target_rev=None,
608 target_rev=None,
609 snapshot_cache=None,
609 snapshot_cache=None,
610 ):
610 ):
611 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
611 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
612 # so we should never end up asking such question. Adding the assert as
612 # so we should never end up asking such question. Adding the assert as
613 # a safe-guard to detect anything that would be fishy in this regard.
613 # a safe-guard to detect anything that would be fishy in this regard.
614 assert (
614 assert (
615 cachedelta is None
615 cachedelta is None
616 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
616 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
617 or not revlog.delta_config.general_delta
617 or not revlog.delta_config.general_delta
618 )
618 )
619 self.revlog = revlog
619 self.revlog = revlog
620 self.revinfo = revinfo
620 self.revinfo = revinfo
621 self.textlen = revinfo.textlen
621 self.textlen = revinfo.textlen
622 self.p1 = p1
622 self.p1 = p1
623 self.p2 = p2
623 self.p2 = p2
624 self.cachedelta = cachedelta
624 self.cachedelta = cachedelta
625 self.excluded_bases = excluded_bases
625 self.excluded_bases = excluded_bases
626 if target_rev is None:
626 if target_rev is None:
627 self.target_rev = len(self.revlog)
627 self.target_rev = len(self.revlog)
628 self.target_rev = target_rev
628 self.target_rev = target_rev
629 if snapshot_cache is None:
629 if snapshot_cache is None:
630 # map: base-rev: [snapshot-revs]
630 # map: base-rev: [snapshot-revs]
631 snapshot_cache = SnapshotCache()
631 snapshot_cache = SnapshotCache()
632 self.snapshot_cache = snapshot_cache
632 self.snapshot_cache = snapshot_cache
633
633
634 self.tested = {nullrev}
634 self.tested = {nullrev}
635
635
636 self._candidates_iterator = self._candidate_groups()
636 self._candidates_iterator = self._candidate_groups()
637 self._last_good = None
637 self._last_good = None
638 self.current_group = self._candidates_iterator.send(self._last_good)
638 self.current_group = self._candidates_iterator.send(self._last_good)
639
639
640 @property
640 @property
641 def done(self):
641 def done(self):
642 """True when all possible candidate have been tested"""
642 """True when all possible candidate have been tested"""
643 return self.current_group is None
643 return self.current_group is None
644
644
645 def next_group(self, good_delta=None):
645 def next_group(self, good_delta=None):
646 """move to the next group to test
646 """move to the next group to test
647
647
648 The group of revision to test will be available in
648 The group of revision to test will be available in
649 `self.current_group`. If the previous group had any good delta, the
649 `self.current_group`. If the previous group had any good delta, the
650 best one can be passed as the `good_delta` parameter to help selecting
650 best one can be passed as the `good_delta` parameter to help selecting
651 the next group.
651 the next group.
652
652
653 If not revision remains to be, `self.done` will be True and
653 If not revision remains to be, `self.done` will be True and
654 `self.current_group` will be None.
654 `self.current_group` will be None.
655 """
655 """
656 if good_delta is not None:
656 if good_delta is not None:
657 self._last_good = good_delta.base
657 self._last_good = good_delta.base
658 self.current_group = self._candidates_iterator.send(self._last_good)
658 self.current_group = self._candidates_iterator.send(self._last_good)
659
659
660 def _candidate_groups(self):
660 def _candidate_groups(self):
661 """Provides group of revision to be tested as delta base
661 """Provides group of revision to be tested as delta base
662
662
663 This top level function focus on emitting groups with unique and
663 This top level function focus on emitting groups with unique and
664 worthwhile content. See _raw_candidate_groups for details about the
664 worthwhile content. See _raw_candidate_groups for details about the
665 group order.
665 group order.
666 """
666 """
667 # should we try to build a delta?
667 # should we try to build a delta?
668 if not (len(self.revlog) and self.revlog._storedeltachains):
668 if not (len(self.revlog) and self.revlog._storedeltachains):
669 yield None
669 yield None
670 return
670 return
671
671
672 if not self.revlog.delta_config.general_delta:
672 if not self.revlog.delta_config.general_delta:
673 # before general delta, there is only one possible delta base
673 # before general delta, there is only one possible delta base
674 yield (self.target_rev - 1,)
674 yield (self.target_rev - 1,)
675 yield None
675 yield None
676 return
676 return
677
677
678 good = None
678 good = None
679
679
680 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
680 group_chunk_size = self.revlog.delta_config.candidate_group_chunk_size
681
681
682 tested = self.tested # prefetch for speed and code compactness
682 tested = self.tested # prefetch for speed and code compactness
683 candidates = self._refined_groups()
683 candidates = self._refined_groups()
684 while True:
684 while True:
685 temptative = candidates.send(good)
685 temptative = candidates.send(good)
686 if temptative is None:
686 if temptative is None:
687 break
687 break
688 group = self._pre_filter_candidate_revs(temptative)
688 group = self._pre_filter_candidate_revs(temptative)
689 if group:
689 if group:
690 # When the size of the candidate group is big, it can result in
690 # When the size of the candidate group is big, it can result in
691 # a quite significant performance impact. To reduce this, we
691 # a quite significant performance impact. To reduce this, we
692 # can send them in smaller batches until the new batch does not
692 # can send them in smaller batches until the new batch does not
693 # provide any improvements.
693 # provide any improvements.
694 #
694 #
695 # This might reduce the overall efficiency of the compression
695 # This might reduce the overall efficiency of the compression
696 # in some corner cases, but that should also prevent very
696 # in some corner cases, but that should also prevent very
697 # pathological cases from being an issue. (eg. 20 000
697 # pathological cases from being an issue. (eg. 20 000
698 # candidates).
698 # candidates).
699 #
699 #
700 # XXX note that the ordering of the group becomes important as
700 # XXX note that the ordering of the group becomes important as
701 # it now impacts the final result. The current order is
701 # it now impacts the final result. The current order is
702 # unprocessed and can be improved.
702 # unprocessed and can be improved.
703 if group_chunk_size == 0:
703 if group_chunk_size == 0:
704 tested.update(group)
704 tested.update(group)
705 good = yield tuple(group)
705 good = yield tuple(group)
706 else:
706 else:
707 prev_good = good
707 prev_good = good
708 for start in range(0, len(group), group_chunk_size):
708 for start in range(0, len(group), group_chunk_size):
709 sub_group = group[start : start + group_chunk_size]
709 sub_group = group[start : start + group_chunk_size]
710 tested.update(sub_group)
710 tested.update(sub_group)
711 good = yield tuple(sub_group)
711 good = yield tuple(sub_group)
712 if prev_good == good:
712 if prev_good == good:
713 break
713 break
714
714
715 yield None
715 yield None
716
716
717 def _pre_filter_candidate_revs(self, temptative):
717 def _pre_filter_candidate_revs(self, temptative):
718 """filter possible candidate before computing a delta
718 """filter possible candidate before computing a delta
719
719
720 This function use various criteria to pre-filter candidate delta base
720 This function use various criteria to pre-filter candidate delta base
721 before we compute a delta and evaluate its quality.
721 before we compute a delta and evaluate its quality.
722
722
723 Such pre-filter limit the number of computed delta, an expensive operation.
723 Such pre-filter limit the number of computed delta, an expensive operation.
724
724
725 return the updated list of revision to test
725 return the updated list of revision to test
726 """
726 """
727 deltalength = self.revlog.length
727 deltalength = self.revlog.length
728 deltaparent = self.revlog.deltaparent
728 deltaparent = self.revlog.deltaparent
729
729
730 tested = self.tested
730 tested = self.tested
731 group = []
731 group = []
732 for rev in temptative:
732 for rev in temptative:
733 # skip over empty delta (no need to include them in a chain)
733 # skip over empty delta (no need to include them in a chain)
734 while not (rev == nullrev or rev in tested or deltalength(rev)):
734 while not (rev == nullrev or rev in tested or deltalength(rev)):
735 tested.add(rev)
735 tested.add(rev)
736 rev = deltaparent(rev)
736 rev = deltaparent(rev)
737 if self._pre_filter_rev(rev):
737 if self._pre_filter_rev(rev):
738 group.append(rev)
738 group.append(rev)
739 else:
739 else:
740 self.tested.add(rev)
740 self.tested.add(rev)
741 return group
741 return group
742
742
743 def _pre_filter_rev_universal(self, rev):
743 def _pre_filter_rev_universal(self, rev):
744 """pre filtering that is need in all cases.
744 """pre filtering that is need in all cases.
745
745
746 return True if it seems okay to test a rev, False otherwise.
746 return True if it seems okay to test a rev, False otherwise.
747
747
748 used by _pre_filter_rev.
748 used by _pre_filter_rev.
749 """
749 """
750 # no need to try a delta against nullrev, this will be done as
750 # no need to try a delta against nullrev, this will be done as
751 # a last resort.
751 # a last resort.
752 if rev == nullrev:
752 if rev == nullrev:
753 return False
753 return False
754 # filter out revision we tested already
754 # filter out revision we tested already
755 if rev in self.tested:
755 if rev in self.tested:
756 return False
756 return False
757
757
758 # an higher authority deamed the base unworthy (e.g. censored)
758 # an higher authority deamed the base unworthy (e.g. censored)
759 if self.excluded_bases is not None and rev in self.excluded_bases:
759 if self.excluded_bases is not None and rev in self.excluded_bases:
760 return False
760 return False
761 # We are in some recomputation cases and that rev is too high
761 # We are in some recomputation cases and that rev is too high
762 # in the revlog
762 # in the revlog
763 if self.target_rev is not None and rev >= self.target_rev:
763 if self.target_rev is not None and rev >= self.target_rev:
764 return False
764 return False
765 # no delta for rawtext-changing revs (see "candelta" for why)
765 # no delta for rawtext-changing revs (see "candelta" for why)
766 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 return False
767 return False
768 return True
768 return True
769
769
770 def _pre_filter_rev(self, rev):
770 def _pre_filter_rev_sparse(self, rev):
771 """return True if it seems okay to test a rev, False otherwise"""
771 """pre filtering that is needed in sparse revlog cases
772 if not self._pre_filter_rev_universal(rev):
773 return False
774
772
775 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
773 return True if it seems okay to test a rev, False otherwise.
776 # filter out delta base that will never produce good delta
777 #
778 # if the delta of that base is already bigger than the limit
779 # for the delta chain size, doing a delta is hopeless.
780 if deltas_limit < self.revlog.length(rev):
781 return False
782
774
783 sparse = self.revlog.delta_config.sparse_revlog
775 used by _pre_filter_rev.
776 """
777 assert self.revlog.delta_config.sparse_revlog
784 # if the revision we test again is too small, the resulting delta
778 # if the revision we test again is too small, the resulting delta
785 # will be large anyway as that amount of data to be added is big
779 # will be large anyway as that amount of data to be added is big
786 if sparse and self.revlog.rawsize(rev) < (
780 if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT):
787 self.textlen // LIMIT_BASE2TEXT
788 ):
789 return False
781 return False
790
782
791 # If we reach here, we are about to build and test a delta.
783 if self.revlog.delta_config.upper_bound_comp is not None:
792 # The delta building process will compute the chaininfo in all
793 # case, since that computation is cached, it is fine to access
794 # it here too.
795 chainlen, chainsize = self.revlog._chaininfo(rev)
796 # if chain will be too long, skip base
797 if (
798 self.revlog.delta_config.max_chain_len
799 and chainlen >= self.revlog.delta_config.max_chain_len
800 ):
801 return False
802 # if chain already have too much data, skip base
803 if deltas_limit < chainsize:
804 return False
805 if sparse and self.revlog.delta_config.upper_bound_comp is not None:
806 maxcomp = self.revlog.delta_config.upper_bound_comp
784 maxcomp = self.revlog.delta_config.upper_bound_comp
807 basenotsnap = (self.p1, self.p2, nullrev)
785 basenotsnap = (self.p1, self.p2, nullrev)
808 if rev not in basenotsnap and self.revlog.issnapshot(rev):
786 if rev not in basenotsnap and self.revlog.issnapshot(rev):
809 snapshotdepth = self.revlog.snapshotdepth(rev)
787 snapshotdepth = self.revlog.snapshotdepth(rev)
810 # If text is significantly larger than the base, we can
788 # If text is significantly larger than the base, we can
811 # expect the resulting delta to be proportional to the size
789 # expect the resulting delta to be proportional to the size
812 # difference
790 # difference
813 revsize = self.revlog.rawsize(rev)
791 revsize = self.revlog.rawsize(rev)
814 rawsizedistance = max(self.textlen - revsize, 0)
792 rawsizedistance = max(self.textlen - revsize, 0)
815 # use an estimate of the compression upper bound.
793 # use an estimate of the compression upper bound.
816 lowestrealisticdeltalen = rawsizedistance // maxcomp
794 lowestrealisticdeltalen = rawsizedistance // maxcomp
817
795
818 # check the absolute constraint on the delta size
796 # check the absolute constraint on the delta size
819 snapshotlimit = self.textlen >> snapshotdepth
797 snapshotlimit = self.textlen >> snapshotdepth
820 if snapshotlimit < lowestrealisticdeltalen:
798 if snapshotlimit < lowestrealisticdeltalen:
821 # delta lower bound is larger than accepted upper
799 # delta lower bound is larger than accepted upper
822 # bound
800 # bound
823 return False
801 return False
824
802
825 # check the relative constraint on the delta size
803 # check the relative constraint on the delta size
826 revlength = self.revlog.length(rev)
804 revlength = self.revlog.length(rev)
827 if revlength < lowestrealisticdeltalen:
805 if revlength < lowestrealisticdeltalen:
828 # delta probable lower bound is larger than target
806 # delta probable lower bound is larger than target
829 # base
807 # base
830 return False
808 return False
831 return True
809 return True
832
810
811 def _pre_filter_rev(self, rev):
812 """return True if it seems okay to test a rev, False otherwise"""
813 if not self._pre_filter_rev_universal(rev):
814 return False
815
816 deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT
817 # filter out delta base that will never produce good delta
818 #
819 # if the delta of that base is already bigger than the limit
820 # for the delta chain size, doing a delta is hopeless.
821 if deltas_limit < self.revlog.length(rev):
822 return False
823
824 # If we reach here, we are about to build and test a delta.
825 # The delta building process will compute the chaininfo in all
826 # case, since that computation is cached, it is fine to access
827 # it here too.
828 chainlen, chainsize = self.revlog._chaininfo(rev)
829 # if chain will be too long, skip base
830 if (
831 self.revlog.delta_config.max_chain_len
832 and chainlen >= self.revlog.delta_config.max_chain_len
833 ):
834 return False
835 # if chain already have too much data, skip base
836 if deltas_limit < chainsize:
837 return False
838
839 if self.revlog.delta_config.sparse_revlog:
840 if not self._pre_filter_rev_sparse(rev):
841 return False
842
843 return True
844
833 def _refined_groups(self):
845 def _refined_groups(self):
834 good = None
846 good = None
835 # First we try to reuse a the delta contained in the bundle. (or from
847 # First we try to reuse a the delta contained in the bundle. (or from
836 # the source revlog)
848 # the source revlog)
837 #
849 #
838 # This logic only applies to general delta repositories and can be
850 # This logic only applies to general delta repositories and can be
839 # disabled through configuration. Disabling reuse source delta is
851 # disabled through configuration. Disabling reuse source delta is
840 # useful when we want to make sure we recomputed "optimal" deltas.
852 # useful when we want to make sure we recomputed "optimal" deltas.
841 debug_info = None
853 debug_info = None
842 if (
854 if (
843 self.cachedelta is not None
855 self.cachedelta is not None
844 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
856 and self.cachedelta[2] > DELTA_BASE_REUSE_NO
845 ):
857 ):
846 # Assume what we received from the server is a good choice
858 # Assume what we received from the server is a good choice
847 # build delta will reuse the cache
859 # build delta will reuse the cache
848 if debug_info is not None:
860 if debug_info is not None:
849 debug_info['cached-delta.tested'] += 1
861 debug_info['cached-delta.tested'] += 1
850 good = yield (self.cachedelta[0],)
862 good = yield (self.cachedelta[0],)
851 if good is not None:
863 if good is not None:
852 if debug_info is not None:
864 if debug_info is not None:
853 debug_info['cached-delta.accepted'] += 1
865 debug_info['cached-delta.accepted'] += 1
854 yield None
866 yield None
855 return
867 return
856 groups = self._raw_groups()
868 groups = self._raw_groups()
857 for candidates in groups:
869 for candidates in groups:
858 good = yield candidates
870 good = yield candidates
859 if good is not None:
871 if good is not None:
860 break
872 break
861
873
862 # If sparse revlog is enabled, we can try to refine the available
874 # If sparse revlog is enabled, we can try to refine the available
863 # deltas
875 # deltas
864 if not self.revlog.delta_config.sparse_revlog:
876 if not self.revlog.delta_config.sparse_revlog:
865 yield None
877 yield None
866 return
878 return
867
879
868 # if we have a refinable value, try to refine it
880 # if we have a refinable value, try to refine it
869 if (
881 if (
870 good is not None
882 good is not None
871 and good not in (self.p1, self.p2)
883 and good not in (self.p1, self.p2)
872 and self.revlog.issnapshot(good)
884 and self.revlog.issnapshot(good)
873 ):
885 ):
874 # refine snapshot down
886 # refine snapshot down
875 previous = None
887 previous = None
876 while previous != good:
888 while previous != good:
877 previous = good
889 previous = good
878 base = self.revlog.deltaparent(good)
890 base = self.revlog.deltaparent(good)
879 if base == nullrev:
891 if base == nullrev:
880 break
892 break
881 good = yield (base,)
893 good = yield (base,)
882 # refine snapshot up
894 # refine snapshot up
883 if not self.snapshot_cache.snapshots:
895 if not self.snapshot_cache.snapshots:
884 self.snapshot_cache.update(self.revlog, good + 1)
896 self.snapshot_cache.update(self.revlog, good + 1)
885 previous = None
897 previous = None
886 while good != previous:
898 while good != previous:
887 previous = good
899 previous = good
888 children = tuple(
900 children = tuple(
889 sorted(c for c in self.snapshot_cache.snapshots[good])
901 sorted(c for c in self.snapshot_cache.snapshots[good])
890 )
902 )
891 good = yield children
903 good = yield children
892
904
893 if debug_info is not None:
905 if debug_info is not None:
894 if good is None:
906 if good is None:
895 debug_info['no-solution'] += 1
907 debug_info['no-solution'] += 1
896
908
897 yield None
909 yield None
898
910
899 def _raw_groups(self):
911 def _raw_groups(self):
900 """Provides group of revision to be tested as delta base
912 """Provides group of revision to be tested as delta base
901
913
902 This lower level function focus on emitting delta theorically
914 This lower level function focus on emitting delta theorically
903 interresting without looking it any practical details.
915 interresting without looking it any practical details.
904
916
905 The group order aims at providing fast or small candidates first.
917 The group order aims at providing fast or small candidates first.
906 """
918 """
907 # Why search for delta base if we cannot use a delta base ?
919 # Why search for delta base if we cannot use a delta base ?
908 assert self.revlog.delta_config.general_delta
920 assert self.revlog.delta_config.general_delta
909 # also see issue6056
921 # also see issue6056
910 sparse = self.revlog.delta_config.sparse_revlog
922 sparse = self.revlog.delta_config.sparse_revlog
911 prev = self.target_rev - 1
923 prev = self.target_rev - 1
912 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
924 deltachain = lambda rev: self.revlog._deltachain(rev)[0]
913
925
914 # exclude already lazy tested base if any
926 # exclude already lazy tested base if any
915 parents = [p for p in (self.p1, self.p2) if p != nullrev]
927 parents = [p for p in (self.p1, self.p2) if p != nullrev]
916
928
917 if (
929 if (
918 not self.revlog.delta_config.delta_both_parents
930 not self.revlog.delta_config.delta_both_parents
919 and len(parents) == 2
931 and len(parents) == 2
920 ):
932 ):
921 parents.sort()
933 parents.sort()
922 # To minimize the chance of having to build a fulltext,
934 # To minimize the chance of having to build a fulltext,
923 # pick first whichever parent is closest to us (max rev)
935 # pick first whichever parent is closest to us (max rev)
924 yield (parents[1],)
936 yield (parents[1],)
925 # then the other one (min rev) if the first did not fit
937 # then the other one (min rev) if the first did not fit
926 yield (parents[0],)
938 yield (parents[0],)
927 elif len(parents) > 0:
939 elif len(parents) > 0:
928 # Test all parents (1 or 2), and keep the best candidate
940 # Test all parents (1 or 2), and keep the best candidate
929 yield parents
941 yield parents
930
942
931 if sparse and parents:
943 if sparse and parents:
932 # See if we can use an existing snapshot in the parent chains to
944 # See if we can use an existing snapshot in the parent chains to
933 # use as a base for a new intermediate-snapshot
945 # use as a base for a new intermediate-snapshot
934 #
946 #
935 # search for snapshot in parents delta chain map: snapshot-level:
947 # search for snapshot in parents delta chain map: snapshot-level:
936 # snapshot-rev
948 # snapshot-rev
937 parents_snaps = collections.defaultdict(set)
949 parents_snaps = collections.defaultdict(set)
938 candidate_chains = [deltachain(p) for p in parents]
950 candidate_chains = [deltachain(p) for p in parents]
939 for chain in candidate_chains:
951 for chain in candidate_chains:
940 for idx, s in enumerate(chain):
952 for idx, s in enumerate(chain):
941 if not self.revlog.issnapshot(s):
953 if not self.revlog.issnapshot(s):
942 break
954 break
943 parents_snaps[idx].add(s)
955 parents_snaps[idx].add(s)
944 snapfloor = min(parents_snaps[0]) + 1
956 snapfloor = min(parents_snaps[0]) + 1
945 self.snapshot_cache.update(self.revlog, snapfloor)
957 self.snapshot_cache.update(self.revlog, snapfloor)
946 # search for the highest "unrelated" revision
958 # search for the highest "unrelated" revision
947 #
959 #
948 # Adding snapshots used by "unrelated" revision increase the odd we
960 # Adding snapshots used by "unrelated" revision increase the odd we
949 # reuse an independant, yet better snapshot chain.
961 # reuse an independant, yet better snapshot chain.
950 #
962 #
951 # XXX instead of building a set of revisions, we could lazily
963 # XXX instead of building a set of revisions, we could lazily
952 # enumerate over the chains. That would be more efficient, however
964 # enumerate over the chains. That would be more efficient, however
953 # we stick to simple code for now.
965 # we stick to simple code for now.
954 all_revs = set()
966 all_revs = set()
955 for chain in candidate_chains:
967 for chain in candidate_chains:
956 all_revs.update(chain)
968 all_revs.update(chain)
957 other = None
969 other = None
958 for r in self.revlog.revs(prev, snapfloor):
970 for r in self.revlog.revs(prev, snapfloor):
959 if r not in all_revs:
971 if r not in all_revs:
960 other = r
972 other = r
961 break
973 break
962 if other is not None:
974 if other is not None:
963 # To avoid unfair competition, we won't use unrelated
975 # To avoid unfair competition, we won't use unrelated
964 # intermediate snapshot that are deeper than the ones from the
976 # intermediate snapshot that are deeper than the ones from the
965 # parent delta chain.
977 # parent delta chain.
966 max_depth = max(parents_snaps.keys())
978 max_depth = max(parents_snaps.keys())
967 chain = deltachain(other)
979 chain = deltachain(other)
968 for depth, s in enumerate(chain):
980 for depth, s in enumerate(chain):
969 if s < snapfloor:
981 if s < snapfloor:
970 continue
982 continue
971 if max_depth < depth:
983 if max_depth < depth:
972 break
984 break
973 if not self.revlog.issnapshot(s):
985 if not self.revlog.issnapshot(s):
974 break
986 break
975 parents_snaps[depth].add(s)
987 parents_snaps[depth].add(s)
976 # Test them as possible intermediate snapshot base We test them
988 # Test them as possible intermediate snapshot base We test them
977 # from highest to lowest level. High level one are more likely to
989 # from highest to lowest level. High level one are more likely to
978 # result in small delta
990 # result in small delta
979 floor = None
991 floor = None
980 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
992 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
981 siblings = set()
993 siblings = set()
982 for s in snaps:
994 for s in snaps:
983 siblings.update(self.snapshot_cache.snapshots[s])
995 siblings.update(self.snapshot_cache.snapshots[s])
984 # Before considering making a new intermediate snapshot, we
996 # Before considering making a new intermediate snapshot, we
985 # check if an existing snapshot, children of base we consider,
997 # check if an existing snapshot, children of base we consider,
986 # would be suitable.
998 # would be suitable.
987 #
999 #
988 # It give a change to reuse a delta chain "unrelated" to the
1000 # It give a change to reuse a delta chain "unrelated" to the
989 # current revision instead of starting our own. Without such
1001 # current revision instead of starting our own. Without such
990 # re-use, topological branches would keep reopening new chains.
1002 # re-use, topological branches would keep reopening new chains.
991 # Creating more and more snapshot as the repository grow.
1003 # Creating more and more snapshot as the repository grow.
992
1004
993 if floor is not None:
1005 if floor is not None:
994 # We only do this for siblings created after the one in our
1006 # We only do this for siblings created after the one in our
995 # parent's delta chain. Those created before has less
1007 # parent's delta chain. Those created before has less
996 # chances to be valid base since our ancestors had to
1008 # chances to be valid base since our ancestors had to
997 # create a new snapshot.
1009 # create a new snapshot.
998 siblings = [r for r in siblings if floor < r]
1010 siblings = [r for r in siblings if floor < r]
999 yield tuple(sorted(siblings))
1011 yield tuple(sorted(siblings))
1000 # then test the base from our parent's delta chain.
1012 # then test the base from our parent's delta chain.
1001 yield tuple(sorted(snaps))
1013 yield tuple(sorted(snaps))
1002 floor = min(snaps)
1014 floor = min(snaps)
1003 # No suitable base found in the parent chain, search if any full
1015 # No suitable base found in the parent chain, search if any full
1004 # snapshots emitted since parent's base would be a suitable base
1016 # snapshots emitted since parent's base would be a suitable base
1005 # for an intermediate snapshot.
1017 # for an intermediate snapshot.
1006 #
1018 #
1007 # It give a chance to reuse a delta chain unrelated to the current
1019 # It give a chance to reuse a delta chain unrelated to the current
1008 # revisions instead of starting our own. Without such re-use,
1020 # revisions instead of starting our own. Without such re-use,
1009 # topological branches would keep reopening new full chains.
1021 # topological branches would keep reopening new full chains.
1010 # Creating more and more snapshot as the repository grow.
1022 # Creating more and more snapshot as the repository grow.
1011 full = [
1023 full = [
1012 r
1024 r
1013 for r in self.snapshot_cache.snapshots[nullrev]
1025 for r in self.snapshot_cache.snapshots[nullrev]
1014 if snapfloor <= r
1026 if snapfloor <= r
1015 ]
1027 ]
1016 yield tuple(sorted(full))
1028 yield tuple(sorted(full))
1017
1029
1018 if not sparse:
1030 if not sparse:
1019 # other approach failed try against prev to hopefully save us a
1031 # other approach failed try against prev to hopefully save us a
1020 # fulltext.
1032 # fulltext.
1021 yield (prev,)
1033 yield (prev,)
1022
1034
1023 def is_good_delta_info(self, deltainfo):
1035 def is_good_delta_info(self, deltainfo):
1024 """Returns True if the given delta is good. Good means that it is
1036 """Returns True if the given delta is good. Good means that it is
1025 within the disk span, disk size, and chain length bounds that we know
1037 within the disk span, disk size, and chain length bounds that we know
1026 to be performant."""
1038 to be performant."""
1027 if deltainfo is None:
1039 if deltainfo is None:
1028 return False
1040 return False
1029
1041
1030 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
1042 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner
1031 # so we should never end up asking such question. Adding the assert as
1043 # so we should never end up asking such question. Adding the assert as
1032 # a safe-guard to detect anything that would be fishy in this regard.
1044 # a safe-guard to detect anything that would be fishy in this regard.
1033 assert (
1045 assert (
1034 self.revinfo.cachedelta is None
1046 self.revinfo.cachedelta is None
1035 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
1047 or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
1036 or not self.revlog.delta_config.general_delta
1048 or not self.revlog.delta_config.general_delta
1037 )
1049 )
1038
1050
1039 # - 'deltainfo.distance' is the distance from the base revision --
1051 # - 'deltainfo.distance' is the distance from the base revision --
1040 # bounding it limits the amount of I/O we need to do.
1052 # bounding it limits the amount of I/O we need to do.
1041 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
1053 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
1042 # deltas we need to apply -- bounding it limits the amount of CPU
1054 # deltas we need to apply -- bounding it limits the amount of CPU
1043 # we consume.
1055 # we consume.
1044
1056
1045 textlen = self.revinfo.textlen
1057 textlen = self.revinfo.textlen
1046 defaultmax = textlen * 4
1058 defaultmax = textlen * 4
1047 maxdist = self.revlog.delta_config.max_deltachain_span
1059 maxdist = self.revlog.delta_config.max_deltachain_span
1048 if not maxdist:
1060 if not maxdist:
1049 maxdist = deltainfo.distance # ensure the conditional pass
1061 maxdist = deltainfo.distance # ensure the conditional pass
1050 maxdist = max(maxdist, defaultmax)
1062 maxdist = max(maxdist, defaultmax)
1051
1063
1052 # Bad delta from read span:
1064 # Bad delta from read span:
1053 #
1065 #
1054 # If the span of data read is larger than the maximum allowed.
1066 # If the span of data read is larger than the maximum allowed.
1055 #
1067 #
1056 # In the sparse-revlog case, we rely on the associated "sparse
1068 # In the sparse-revlog case, we rely on the associated "sparse
1057 # reading" to avoid issue related to the span of data. In theory, it
1069 # reading" to avoid issue related to the span of data. In theory, it
1058 # would be possible to build pathological revlog where delta pattern
1070 # would be possible to build pathological revlog where delta pattern
1059 # would lead to too many reads. However, they do not happen in
1071 # would lead to too many reads. However, they do not happen in
1060 # practice at all. So we skip the span check entirely.
1072 # practice at all. So we skip the span check entirely.
1061 if (
1073 if (
1062 not self.revlog.delta_config.sparse_revlog
1074 not self.revlog.delta_config.sparse_revlog
1063 and maxdist < deltainfo.distance
1075 and maxdist < deltainfo.distance
1064 ):
1076 ):
1065 return False
1077 return False
1066
1078
1067 # Bad delta from new delta size:
1079 # Bad delta from new delta size:
1068 #
1080 #
1069 # If the delta size is larger than the target text, storing the delta
1081 # If the delta size is larger than the target text, storing the delta
1070 # will be inefficient.
1082 # will be inefficient.
1071 if textlen < deltainfo.deltalen:
1083 if textlen < deltainfo.deltalen:
1072 return False
1084 return False
1073
1085
1074 # Bad delta from cumulated payload size:
1086 # Bad delta from cumulated payload size:
1075 #
1087 #
1076 # If the sum of delta get larger than K * target text length.
1088 # If the sum of delta get larger than K * target text length.
1077 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
1089 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
1078 return False
1090 return False
1079
1091
1080 # Bad delta from chain length:
1092 # Bad delta from chain length:
1081 #
1093 #
1082 # If the number of delta in the chain gets too high.
1094 # If the number of delta in the chain gets too high.
1083 if (
1095 if (
1084 self.revlog.delta_config.max_chain_len
1096 self.revlog.delta_config.max_chain_len
1085 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1097 and self.revlog.delta_config.max_chain_len < deltainfo.chainlen
1086 ):
1098 ):
1087 return False
1099 return False
1088
1100
1089 # bad delta from intermediate snapshot size limit
1101 # bad delta from intermediate snapshot size limit
1090 #
1102 #
1091 # If an intermediate snapshot size is higher than the limit. The
1103 # If an intermediate snapshot size is higher than the limit. The
1092 # limit exist to prevent endless chain of intermediate delta to be
1104 # limit exist to prevent endless chain of intermediate delta to be
1093 # created.
1105 # created.
1094 if (
1106 if (
1095 deltainfo.snapshotdepth is not None
1107 deltainfo.snapshotdepth is not None
1096 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
1108 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
1097 ):
1109 ):
1098 return False
1110 return False
1099
1111
1100 # bad delta if new intermediate snapshot is larger than the previous
1112 # bad delta if new intermediate snapshot is larger than the previous
1101 # snapshot
1113 # snapshot
1102 if (
1114 if (
1103 deltainfo.snapshotdepth
1115 deltainfo.snapshotdepth
1104 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1116 and self.revlog.length(deltainfo.base) < deltainfo.deltalen
1105 ):
1117 ):
1106 return False
1118 return False
1107
1119
1108 return True
1120 return True
1109
1121
1110
1122
1111 class SnapshotCache:
1123 class SnapshotCache:
1112 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1124 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1113
1125
1114 def __init__(self):
1126 def __init__(self):
1115 self.snapshots = collections.defaultdict(set)
1127 self.snapshots = collections.defaultdict(set)
1116 self._start_rev = None
1128 self._start_rev = None
1117 self._end_rev = None
1129 self._end_rev = None
1118
1130
1119 def update(self, revlog, start_rev=0):
1131 def update(self, revlog, start_rev=0):
1120 """find snapshots from start_rev to tip"""
1132 """find snapshots from start_rev to tip"""
1121 nb_revs = len(revlog)
1133 nb_revs = len(revlog)
1122 end_rev = nb_revs - 1
1134 end_rev = nb_revs - 1
1123 if start_rev > end_rev:
1135 if start_rev > end_rev:
1124 return # range is empty
1136 return # range is empty
1125
1137
1126 if self._start_rev is None:
1138 if self._start_rev is None:
1127 assert self._end_rev is None
1139 assert self._end_rev is None
1128 self._update(revlog, start_rev, end_rev)
1140 self._update(revlog, start_rev, end_rev)
1129 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1141 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1130 if start_rev < self._start_rev:
1142 if start_rev < self._start_rev:
1131 self._update(revlog, start_rev, self._start_rev - 1)
1143 self._update(revlog, start_rev, self._start_rev - 1)
1132 if self._end_rev < end_rev:
1144 if self._end_rev < end_rev:
1133 self._update(revlog, self._end_rev + 1, end_rev)
1145 self._update(revlog, self._end_rev + 1, end_rev)
1134
1146
1135 if self._start_rev is None:
1147 if self._start_rev is None:
1136 assert self._end_rev is None
1148 assert self._end_rev is None
1137 self._end_rev = end_rev
1149 self._end_rev = end_rev
1138 self._start_rev = start_rev
1150 self._start_rev = start_rev
1139 else:
1151 else:
1140 self._start_rev = min(self._start_rev, start_rev)
1152 self._start_rev = min(self._start_rev, start_rev)
1141 self._end_rev = max(self._end_rev, end_rev)
1153 self._end_rev = max(self._end_rev, end_rev)
1142 assert self._start_rev <= self._end_rev, (
1154 assert self._start_rev <= self._end_rev, (
1143 self._start_rev,
1155 self._start_rev,
1144 self._end_rev,
1156 self._end_rev,
1145 )
1157 )
1146
1158
1147 def _update(self, revlog, start_rev, end_rev):
1159 def _update(self, revlog, start_rev, end_rev):
1148 """internal method that actually do update content"""
1160 """internal method that actually do update content"""
1149 assert self._start_rev is None or (
1161 assert self._start_rev is None or (
1150 start_rev < self._start_rev or start_rev > self._end_rev
1162 start_rev < self._start_rev or start_rev > self._end_rev
1151 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1163 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1152 assert self._start_rev is None or (
1164 assert self._start_rev is None or (
1153 end_rev < self._start_rev or end_rev > self._end_rev
1165 end_rev < self._start_rev or end_rev > self._end_rev
1154 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1166 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1155 cache = self.snapshots
1167 cache = self.snapshots
1156 if hasattr(revlog.index, 'findsnapshots'):
1168 if hasattr(revlog.index, 'findsnapshots'):
1157 revlog.index.findsnapshots(cache, start_rev, end_rev)
1169 revlog.index.findsnapshots(cache, start_rev, end_rev)
1158 else:
1170 else:
1159 deltaparent = revlog.deltaparent
1171 deltaparent = revlog.deltaparent
1160 issnapshot = revlog.issnapshot
1172 issnapshot = revlog.issnapshot
1161 for rev in revlog.revs(start_rev, end_rev):
1173 for rev in revlog.revs(start_rev, end_rev):
1162 if issnapshot(rev):
1174 if issnapshot(rev):
1163 cache[deltaparent(rev)].add(rev)
1175 cache[deltaparent(rev)].add(rev)
1164
1176
1165
1177
1166 class deltacomputer:
1178 class deltacomputer:
1167 """object capable of computing delta and finding delta for multiple revision
1179 """object capable of computing delta and finding delta for multiple revision
1168
1180
1169 This object is meant to compute and find multiple delta applied to the same
1181 This object is meant to compute and find multiple delta applied to the same
1170 revlog.
1182 revlog.
1171 """
1183 """
1172
1184
1173 def __init__(
1185 def __init__(
1174 self,
1186 self,
1175 revlog,
1187 revlog,
1176 write_debug=None,
1188 write_debug=None,
1177 debug_search=False,
1189 debug_search=False,
1178 debug_info=None,
1190 debug_info=None,
1179 ):
1191 ):
1180 self.revlog = revlog
1192 self.revlog = revlog
1181 self._write_debug = write_debug
1193 self._write_debug = write_debug
1182 if write_debug is None:
1194 if write_debug is None:
1183 self._debug_search = False
1195 self._debug_search = False
1184 else:
1196 else:
1185 self._debug_search = debug_search
1197 self._debug_search = debug_search
1186 self._debug_info = debug_info
1198 self._debug_info = debug_info
1187 self._snapshot_cache = SnapshotCache()
1199 self._snapshot_cache = SnapshotCache()
1188
1200
1189 @property
1201 @property
1190 def _gather_debug(self):
1202 def _gather_debug(self):
1191 return self._write_debug is not None or self._debug_info is not None
1203 return self._write_debug is not None or self._debug_info is not None
1192
1204
1193 def buildtext(self, revinfo):
1205 def buildtext(self, revinfo):
1194 """Builds a fulltext version of a revision
1206 """Builds a fulltext version of a revision
1195
1207
1196 revinfo: revisioninfo instance that contains all needed info
1208 revinfo: revisioninfo instance that contains all needed info
1197 """
1209 """
1198 btext = revinfo.btext
1210 btext = revinfo.btext
1199 if btext[0] is not None:
1211 if btext[0] is not None:
1200 return btext[0]
1212 return btext[0]
1201
1213
1202 revlog = self.revlog
1214 revlog = self.revlog
1203 cachedelta = revinfo.cachedelta
1215 cachedelta = revinfo.cachedelta
1204 baserev = cachedelta[0]
1216 baserev = cachedelta[0]
1205 delta = cachedelta[1]
1217 delta = cachedelta[1]
1206
1218
1207 fulltext = btext[0] = _textfromdelta(
1219 fulltext = btext[0] = _textfromdelta(
1208 revlog,
1220 revlog,
1209 baserev,
1221 baserev,
1210 delta,
1222 delta,
1211 revinfo.p1,
1223 revinfo.p1,
1212 revinfo.p2,
1224 revinfo.p2,
1213 revinfo.flags,
1225 revinfo.flags,
1214 revinfo.node,
1226 revinfo.node,
1215 )
1227 )
1216 return fulltext
1228 return fulltext
1217
1229
1218 def _builddeltadiff(self, base, revinfo):
1230 def _builddeltadiff(self, base, revinfo):
1219 revlog = self.revlog
1231 revlog = self.revlog
1220 t = self.buildtext(revinfo)
1232 t = self.buildtext(revinfo)
1221 if revlog.iscensored(base):
1233 if revlog.iscensored(base):
1222 # deltas based on a censored revision must replace the
1234 # deltas based on a censored revision must replace the
1223 # full content in one patch, so delta works everywhere
1235 # full content in one patch, so delta works everywhere
1224 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1236 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1225 delta = header + t
1237 delta = header + t
1226 else:
1238 else:
1227 ptext = revlog.rawdata(base)
1239 ptext = revlog.rawdata(base)
1228 delta = mdiff.textdiff(ptext, t)
1240 delta = mdiff.textdiff(ptext, t)
1229
1241
1230 return delta
1242 return delta
1231
1243
1232 def _builddeltainfo(self, revinfo, base, target_rev=None):
1244 def _builddeltainfo(self, revinfo, base, target_rev=None):
1233 # can we use the cached delta?
1245 # can we use the cached delta?
1234 revlog = self.revlog
1246 revlog = self.revlog
1235 chainbase = revlog.chainbase(base)
1247 chainbase = revlog.chainbase(base)
1236 if revlog.delta_config.general_delta:
1248 if revlog.delta_config.general_delta:
1237 deltabase = base
1249 deltabase = base
1238 else:
1250 else:
1239 if target_rev is not None and base != target_rev - 1:
1251 if target_rev is not None and base != target_rev - 1:
1240 msg = (
1252 msg = (
1241 b'general delta cannot use delta for something else '
1253 b'general delta cannot use delta for something else '
1242 b'than `prev`: %d<-%d'
1254 b'than `prev`: %d<-%d'
1243 )
1255 )
1244 msg %= (base, target_rev)
1256 msg %= (base, target_rev)
1245 raise error.ProgrammingError(msg)
1257 raise error.ProgrammingError(msg)
1246 deltabase = chainbase
1258 deltabase = chainbase
1247 snapshotdepth = None
1259 snapshotdepth = None
1248 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1260 if revlog.delta_config.sparse_revlog and deltabase == nullrev:
1249 snapshotdepth = 0
1261 snapshotdepth = 0
1250 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1262 elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase):
1251 # A delta chain should always be one full snapshot,
1263 # A delta chain should always be one full snapshot,
1252 # zero or more semi-snapshots, and zero or more deltas
1264 # zero or more semi-snapshots, and zero or more deltas
1253 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1265 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1254 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1266 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1255 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1267 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1256 delta = None
1268 delta = None
1257 if revinfo.cachedelta:
1269 if revinfo.cachedelta:
1258 cachebase = revinfo.cachedelta[0]
1270 cachebase = revinfo.cachedelta[0]
1259 # check if the diff still apply
1271 # check if the diff still apply
1260 currentbase = cachebase
1272 currentbase = cachebase
1261 while (
1273 while (
1262 currentbase != nullrev
1274 currentbase != nullrev
1263 and currentbase != base
1275 and currentbase != base
1264 and self.revlog.length(currentbase) == 0
1276 and self.revlog.length(currentbase) == 0
1265 ):
1277 ):
1266 currentbase = self.revlog.deltaparent(currentbase)
1278 currentbase = self.revlog.deltaparent(currentbase)
1267 if self.revlog.delta_config.lazy_delta and currentbase == base:
1279 if self.revlog.delta_config.lazy_delta and currentbase == base:
1268 delta = revinfo.cachedelta[1]
1280 delta = revinfo.cachedelta[1]
1269 if delta is None:
1281 if delta is None:
1270 delta = self._builddeltadiff(base, revinfo)
1282 delta = self._builddeltadiff(base, revinfo)
1271 if self._debug_search:
1283 if self._debug_search:
1272 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1284 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1273 msg %= len(delta)
1285 msg %= len(delta)
1274 self._write_debug(msg)
1286 self._write_debug(msg)
1275 # snapshotdept need to be neither None nor 0 level snapshot
1287 # snapshotdept need to be neither None nor 0 level snapshot
1276 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1288 if revlog.delta_config.upper_bound_comp is not None and snapshotdepth:
1277 lowestrealisticdeltalen = (
1289 lowestrealisticdeltalen = (
1278 len(delta) // revlog.delta_config.upper_bound_comp
1290 len(delta) // revlog.delta_config.upper_bound_comp
1279 )
1291 )
1280 snapshotlimit = revinfo.textlen >> snapshotdepth
1292 snapshotlimit = revinfo.textlen >> snapshotdepth
1281 if self._debug_search:
1293 if self._debug_search:
1282 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1294 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1283 msg %= lowestrealisticdeltalen
1295 msg %= lowestrealisticdeltalen
1284 self._write_debug(msg)
1296 self._write_debug(msg)
1285 if snapshotlimit < lowestrealisticdeltalen:
1297 if snapshotlimit < lowestrealisticdeltalen:
1286 if self._debug_search:
1298 if self._debug_search:
1287 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1299 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1288 self._write_debug(msg)
1300 self._write_debug(msg)
1289 return None
1301 return None
1290 if revlog.length(base) < lowestrealisticdeltalen:
1302 if revlog.length(base) < lowestrealisticdeltalen:
1291 if self._debug_search:
1303 if self._debug_search:
1292 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1304 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1293 self._write_debug(msg)
1305 self._write_debug(msg)
1294 return None
1306 return None
1295 header, data = revlog._inner.compress(delta)
1307 header, data = revlog._inner.compress(delta)
1296 deltalen = len(header) + len(data)
1308 deltalen = len(header) + len(data)
1297 offset = revlog.end(len(revlog) - 1)
1309 offset = revlog.end(len(revlog) - 1)
1298 dist = deltalen + offset - revlog.start(chainbase)
1310 dist = deltalen + offset - revlog.start(chainbase)
1299 chainlen, compresseddeltalen = revlog._chaininfo(base)
1311 chainlen, compresseddeltalen = revlog._chaininfo(base)
1300 chainlen += 1
1312 chainlen += 1
1301 compresseddeltalen += deltalen
1313 compresseddeltalen += deltalen
1302
1314
1303 return _deltainfo(
1315 return _deltainfo(
1304 dist,
1316 dist,
1305 deltalen,
1317 deltalen,
1306 (header, data),
1318 (header, data),
1307 deltabase,
1319 deltabase,
1308 chainbase,
1320 chainbase,
1309 chainlen,
1321 chainlen,
1310 compresseddeltalen,
1322 compresseddeltalen,
1311 snapshotdepth,
1323 snapshotdepth,
1312 )
1324 )
1313
1325
1314 def _fullsnapshotinfo(self, revinfo, curr):
1326 def _fullsnapshotinfo(self, revinfo, curr):
1315 rawtext = self.buildtext(revinfo)
1327 rawtext = self.buildtext(revinfo)
1316 data = self.revlog._inner.compress(rawtext)
1328 data = self.revlog._inner.compress(rawtext)
1317 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1329 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1318 deltabase = chainbase = curr
1330 deltabase = chainbase = curr
1319 snapshotdepth = 0
1331 snapshotdepth = 0
1320 chainlen = 1
1332 chainlen = 1
1321
1333
1322 return _deltainfo(
1334 return _deltainfo(
1323 dist,
1335 dist,
1324 deltalen,
1336 deltalen,
1325 data,
1337 data,
1326 deltabase,
1338 deltabase,
1327 chainbase,
1339 chainbase,
1328 chainlen,
1340 chainlen,
1329 compresseddeltalen,
1341 compresseddeltalen,
1330 snapshotdepth,
1342 snapshotdepth,
1331 )
1343 )
1332
1344
1333 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1345 def finddeltainfo(self, revinfo, excluded_bases=None, target_rev=None):
1334 """Find an acceptable delta against a candidate revision
1346 """Find an acceptable delta against a candidate revision
1335
1347
1336 revinfo: information about the revision (instance of _revisioninfo)
1348 revinfo: information about the revision (instance of _revisioninfo)
1337
1349
1338 Returns the first acceptable candidate revision, as ordered by
1350 Returns the first acceptable candidate revision, as ordered by
1339 _candidategroups
1351 _candidategroups
1340
1352
1341 If no suitable deltabase is found, we return delta info for a full
1353 If no suitable deltabase is found, we return delta info for a full
1342 snapshot.
1354 snapshot.
1343
1355
1344 `excluded_bases` is an optional set of revision that cannot be used as
1356 `excluded_bases` is an optional set of revision that cannot be used as
1345 a delta base. Use this to recompute delta suitable in censor or strip
1357 a delta base. Use this to recompute delta suitable in censor or strip
1346 context.
1358 context.
1347 """
1359 """
1348 if target_rev is None:
1360 if target_rev is None:
1349 target_rev = len(self.revlog)
1361 target_rev = len(self.revlog)
1350
1362
1351 gather_debug = self._gather_debug
1363 gather_debug = self._gather_debug
1352 cachedelta = revinfo.cachedelta
1364 cachedelta = revinfo.cachedelta
1353 revlog = self.revlog
1365 revlog = self.revlog
1354 p1r = p2r = None
1366 p1r = p2r = None
1355
1367
1356 if excluded_bases is None:
1368 if excluded_bases is None:
1357 excluded_bases = set()
1369 excluded_bases = set()
1358
1370
1359 if gather_debug:
1371 if gather_debug:
1360 start = util.timer()
1372 start = util.timer()
1361 dbg = self._one_dbg_data()
1373 dbg = self._one_dbg_data()
1362 dbg['revision'] = target_rev
1374 dbg['revision'] = target_rev
1363 p1r = revlog.rev(revinfo.p1)
1375 p1r = revlog.rev(revinfo.p1)
1364 p2r = revlog.rev(revinfo.p2)
1376 p2r = revlog.rev(revinfo.p2)
1365 if p1r != nullrev:
1377 if p1r != nullrev:
1366 p1_chain_len = revlog._chaininfo(p1r)[0]
1378 p1_chain_len = revlog._chaininfo(p1r)[0]
1367 else:
1379 else:
1368 p1_chain_len = -1
1380 p1_chain_len = -1
1369 if p2r != nullrev:
1381 if p2r != nullrev:
1370 p2_chain_len = revlog._chaininfo(p2r)[0]
1382 p2_chain_len = revlog._chaininfo(p2r)[0]
1371 else:
1383 else:
1372 p2_chain_len = -1
1384 p2_chain_len = -1
1373 dbg['p1-chain-len'] = p1_chain_len
1385 dbg['p1-chain-len'] = p1_chain_len
1374 dbg['p2-chain-len'] = p2_chain_len
1386 dbg['p2-chain-len'] = p2_chain_len
1375
1387
1376 # 1) if the revision is empty, no amount of delta can beat it
1388 # 1) if the revision is empty, no amount of delta can beat it
1377 #
1389 #
1378 # 2) no delta for flag processor revision (see "candelta" for why)
1390 # 2) no delta for flag processor revision (see "candelta" for why)
1379 # not calling candelta since only one revision needs test, also to
1391 # not calling candelta since only one revision needs test, also to
1380 # avoid overhead fetching flags again.
1392 # avoid overhead fetching flags again.
1381 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1393 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1382 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1394 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1383 if gather_debug:
1395 if gather_debug:
1384 end = util.timer()
1396 end = util.timer()
1385 dbg['duration'] = end - start
1397 dbg['duration'] = end - start
1386 dbg[
1398 dbg[
1387 'delta-base'
1399 'delta-base'
1388 ] = deltainfo.base # pytype: disable=attribute-error
1400 ] = deltainfo.base # pytype: disable=attribute-error
1389 dbg['search_round_count'] = 0
1401 dbg['search_round_count'] = 0
1390 dbg['using-cached-base'] = False
1402 dbg['using-cached-base'] = False
1391 dbg['delta_try_count'] = 0
1403 dbg['delta_try_count'] = 0
1392 dbg['type'] = b"full"
1404 dbg['type'] = b"full"
1393 dbg['snapshot-depth'] = 0
1405 dbg['snapshot-depth'] = 0
1394 self._dbg_process_data(dbg)
1406 self._dbg_process_data(dbg)
1395 return deltainfo
1407 return deltainfo
1396
1408
1397 deltainfo = None
1409 deltainfo = None
1398
1410
1399 # If this source delta are to be forcibly reuse, let us comply early.
1411 # If this source delta are to be forcibly reuse, let us comply early.
1400 if (
1412 if (
1401 revlog.delta_config.general_delta
1413 revlog.delta_config.general_delta
1402 and revinfo.cachedelta is not None
1414 and revinfo.cachedelta is not None
1403 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1415 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1404 ):
1416 ):
1405 base = revinfo.cachedelta[0]
1417 base = revinfo.cachedelta[0]
1406 if base == nullrev:
1418 if base == nullrev:
1407 dbg_type = b"full"
1419 dbg_type = b"full"
1408 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1420 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1409 if gather_debug:
1421 if gather_debug:
1410 snapshotdepth = 0
1422 snapshotdepth = 0
1411 elif base not in excluded_bases:
1423 elif base not in excluded_bases:
1412 delta = revinfo.cachedelta[1]
1424 delta = revinfo.cachedelta[1]
1413 header, data = revlog.compress(delta)
1425 header, data = revlog.compress(delta)
1414 deltalen = len(header) + len(data)
1426 deltalen = len(header) + len(data)
1415 if gather_debug:
1427 if gather_debug:
1416 offset = revlog.end(len(revlog) - 1)
1428 offset = revlog.end(len(revlog) - 1)
1417 chainbase = revlog.chainbase(base)
1429 chainbase = revlog.chainbase(base)
1418 distance = deltalen + offset - revlog.start(chainbase)
1430 distance = deltalen + offset - revlog.start(chainbase)
1419 chainlen, compresseddeltalen = revlog._chaininfo(base)
1431 chainlen, compresseddeltalen = revlog._chaininfo(base)
1420 chainlen += 1
1432 chainlen += 1
1421 compresseddeltalen += deltalen
1433 compresseddeltalen += deltalen
1422 if base == p1r or base == p2r:
1434 if base == p1r or base == p2r:
1423 dbg_type = b"delta"
1435 dbg_type = b"delta"
1424 snapshotdepth = None
1436 snapshotdepth = None
1425 elif not revlog.issnapshot(base):
1437 elif not revlog.issnapshot(base):
1426 snapshotdepth = None
1438 snapshotdepth = None
1427 else:
1439 else:
1428 dbg_type = b"snapshot"
1440 dbg_type = b"snapshot"
1429 snapshotdepth = revlog.snapshotdepth(base) + 1
1441 snapshotdepth = revlog.snapshotdepth(base) + 1
1430 else:
1442 else:
1431 distance = None
1443 distance = None
1432 chainbase = None
1444 chainbase = None
1433 chainlen = None
1445 chainlen = None
1434 compresseddeltalen = None
1446 compresseddeltalen = None
1435 snapshotdepth = None
1447 snapshotdepth = None
1436 deltainfo = _deltainfo(
1448 deltainfo = _deltainfo(
1437 distance=distance,
1449 distance=distance,
1438 deltalen=deltalen,
1450 deltalen=deltalen,
1439 data=(header, data),
1451 data=(header, data),
1440 base=base,
1452 base=base,
1441 chainbase=chainbase,
1453 chainbase=chainbase,
1442 chainlen=chainlen,
1454 chainlen=chainlen,
1443 compresseddeltalen=compresseddeltalen,
1455 compresseddeltalen=compresseddeltalen,
1444 snapshotdepth=snapshotdepth,
1456 snapshotdepth=snapshotdepth,
1445 )
1457 )
1446
1458
1447 if deltainfo is not None:
1459 if deltainfo is not None:
1448 if gather_debug:
1460 if gather_debug:
1449 end = util.timer()
1461 end = util.timer()
1450 dbg['duration'] = end - start
1462 dbg['duration'] = end - start
1451 dbg[
1463 dbg[
1452 'delta-base'
1464 'delta-base'
1453 ] = deltainfo.base # pytype: disable=attribute-error
1465 ] = deltainfo.base # pytype: disable=attribute-error
1454 dbg['search_round_count'] = 0
1466 dbg['search_round_count'] = 0
1455 dbg['using-cached-base'] = True
1467 dbg['using-cached-base'] = True
1456 dbg['delta_try_count'] = 0
1468 dbg['delta_try_count'] = 0
1457 dbg['type'] = b"full"
1469 dbg['type'] = b"full"
1458 if snapshotdepth is None:
1470 if snapshotdepth is None:
1459 dbg['snapshot-depth'] = -1
1471 dbg['snapshot-depth'] = -1
1460 else:
1472 else:
1461 dbg['snapshot-depth'] = snapshotdepth
1473 dbg['snapshot-depth'] = snapshotdepth
1462 self._dbg_process_data(dbg)
1474 self._dbg_process_data(dbg)
1463 return deltainfo
1475 return deltainfo
1464
1476
1465 # count the number of different delta we tried (for debug purpose)
1477 # count the number of different delta we tried (for debug purpose)
1466 dbg_try_count = 0
1478 dbg_try_count = 0
1467 # count the number of "search round" we did. (for debug purpose)
1479 # count the number of "search round" we did. (for debug purpose)
1468 dbg_try_rounds = 0
1480 dbg_try_rounds = 0
1469 dbg_type = b'unknown'
1481 dbg_type = b'unknown'
1470
1482
1471 if p1r is None:
1483 if p1r is None:
1472 p1r = revlog.rev(revinfo.p1)
1484 p1r = revlog.rev(revinfo.p1)
1473 p2r = revlog.rev(revinfo.p2)
1485 p2r = revlog.rev(revinfo.p2)
1474
1486
1475 if self._debug_search:
1487 if self._debug_search:
1476 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1488 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1477 msg %= target_rev
1489 msg %= target_rev
1478 self._write_debug(msg)
1490 self._write_debug(msg)
1479
1491
1480 search = _DeltaSearch(
1492 search = _DeltaSearch(
1481 self.revlog,
1493 self.revlog,
1482 revinfo,
1494 revinfo,
1483 p1r,
1495 p1r,
1484 p2r,
1496 p2r,
1485 cachedelta,
1497 cachedelta,
1486 excluded_bases,
1498 excluded_bases,
1487 target_rev,
1499 target_rev,
1488 snapshot_cache=self._snapshot_cache,
1500 snapshot_cache=self._snapshot_cache,
1489 )
1501 )
1490
1502
1491 while not search.done:
1503 while not search.done:
1492 current_group = search.current_group
1504 current_group = search.current_group
1493 # current_group can be `None`, but not is search.done is False
1505 # current_group can be `None`, but not is search.done is False
1494 # We add this assert to help pytype
1506 # We add this assert to help pytype
1495 assert current_group is not None
1507 assert current_group is not None
1496 candidaterevs = current_group
1508 candidaterevs = current_group
1497 dbg_try_rounds += 1
1509 dbg_try_rounds += 1
1498 if self._debug_search:
1510 if self._debug_search:
1499 prev = None
1511 prev = None
1500 if deltainfo is not None:
1512 if deltainfo is not None:
1501 prev = deltainfo.base
1513 prev = deltainfo.base
1502
1514
1503 if (
1515 if (
1504 cachedelta is not None
1516 cachedelta is not None
1505 and len(candidaterevs) == 1
1517 and len(candidaterevs) == 1
1506 and cachedelta[0] in candidaterevs
1518 and cachedelta[0] in candidaterevs
1507 ):
1519 ):
1508 round_type = b"cached-delta"
1520 round_type = b"cached-delta"
1509 elif p1r in candidaterevs or p2r in candidaterevs:
1521 elif p1r in candidaterevs or p2r in candidaterevs:
1510 round_type = b"parents"
1522 round_type = b"parents"
1511 elif prev is not None and all(c < prev for c in candidaterevs):
1523 elif prev is not None and all(c < prev for c in candidaterevs):
1512 round_type = b"refine-down"
1524 round_type = b"refine-down"
1513 elif prev is not None and all(c > prev for c in candidaterevs):
1525 elif prev is not None and all(c > prev for c in candidaterevs):
1514 round_type = b"refine-up"
1526 round_type = b"refine-up"
1515 else:
1527 else:
1516 round_type = b"search-down"
1528 round_type = b"search-down"
1517 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1529 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1518 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1530 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1519 self._write_debug(msg)
1531 self._write_debug(msg)
1520 nominateddeltas = []
1532 nominateddeltas = []
1521 if deltainfo is not None:
1533 if deltainfo is not None:
1522 if self._debug_search:
1534 if self._debug_search:
1523 msg = (
1535 msg = (
1524 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1536 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1525 )
1537 )
1526 msg %= (deltainfo.base, deltainfo.deltalen)
1538 msg %= (deltainfo.base, deltainfo.deltalen)
1527 self._write_debug(msg)
1539 self._write_debug(msg)
1528 # if we already found a good delta,
1540 # if we already found a good delta,
1529 # challenge it against refined candidates
1541 # challenge it against refined candidates
1530 nominateddeltas.append(deltainfo)
1542 nominateddeltas.append(deltainfo)
1531 for candidaterev in candidaterevs:
1543 for candidaterev in candidaterevs:
1532 if self._debug_search:
1544 if self._debug_search:
1533 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1545 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1534 msg %= candidaterev
1546 msg %= candidaterev
1535 self._write_debug(msg)
1547 self._write_debug(msg)
1536 candidate_type = None
1548 candidate_type = None
1537 if candidaterev == p1r:
1549 if candidaterev == p1r:
1538 candidate_type = b"p1"
1550 candidate_type = b"p1"
1539 elif candidaterev == p2r:
1551 elif candidaterev == p2r:
1540 candidate_type = b"p2"
1552 candidate_type = b"p2"
1541 elif self.revlog.issnapshot(candidaterev):
1553 elif self.revlog.issnapshot(candidaterev):
1542 candidate_type = b"snapshot-%d"
1554 candidate_type = b"snapshot-%d"
1543 candidate_type %= self.revlog.snapshotdepth(
1555 candidate_type %= self.revlog.snapshotdepth(
1544 candidaterev
1556 candidaterev
1545 )
1557 )
1546
1558
1547 if candidate_type is not None:
1559 if candidate_type is not None:
1548 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1560 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1549 msg %= candidate_type
1561 msg %= candidate_type
1550 self._write_debug(msg)
1562 self._write_debug(msg)
1551 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1563 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1552 msg %= self.revlog.length(candidaterev)
1564 msg %= self.revlog.length(candidaterev)
1553 self._write_debug(msg)
1565 self._write_debug(msg)
1554 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1566 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1555 msg %= self.revlog.deltaparent(candidaterev)
1567 msg %= self.revlog.deltaparent(candidaterev)
1556 self._write_debug(msg)
1568 self._write_debug(msg)
1557
1569
1558 dbg_try_count += 1
1570 dbg_try_count += 1
1559
1571
1560 if self._debug_search:
1572 if self._debug_search:
1561 delta_start = util.timer()
1573 delta_start = util.timer()
1562 candidatedelta = self._builddeltainfo(
1574 candidatedelta = self._builddeltainfo(
1563 revinfo,
1575 revinfo,
1564 candidaterev,
1576 candidaterev,
1565 target_rev=target_rev,
1577 target_rev=target_rev,
1566 )
1578 )
1567 if self._debug_search:
1579 if self._debug_search:
1568 delta_end = util.timer()
1580 delta_end = util.timer()
1569 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1581 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1570 msg %= delta_end - delta_start
1582 msg %= delta_end - delta_start
1571 self._write_debug(msg)
1583 self._write_debug(msg)
1572 if candidatedelta is not None:
1584 if candidatedelta is not None:
1573 if search.is_good_delta_info(candidatedelta):
1585 if search.is_good_delta_info(candidatedelta):
1574 if self._debug_search:
1586 if self._debug_search:
1575 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1587 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1576 msg %= candidatedelta.deltalen
1588 msg %= candidatedelta.deltalen
1577 self._write_debug(msg)
1589 self._write_debug(msg)
1578 nominateddeltas.append(candidatedelta)
1590 nominateddeltas.append(candidatedelta)
1579 elif self._debug_search:
1591 elif self._debug_search:
1580 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1592 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1581 msg %= candidatedelta.deltalen
1593 msg %= candidatedelta.deltalen
1582 self._write_debug(msg)
1594 self._write_debug(msg)
1583 elif self._debug_search:
1595 elif self._debug_search:
1584 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1596 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1585 self._write_debug(msg)
1597 self._write_debug(msg)
1586 if nominateddeltas:
1598 if nominateddeltas:
1587 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1599 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1588 search.next_group(deltainfo)
1600 search.next_group(deltainfo)
1589
1601
1590 if deltainfo is None:
1602 if deltainfo is None:
1591 dbg_type = b"full"
1603 dbg_type = b"full"
1592 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1604 deltainfo = self._fullsnapshotinfo(revinfo, target_rev)
1593 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1605 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1594 dbg_type = b"snapshot"
1606 dbg_type = b"snapshot"
1595 else:
1607 else:
1596 dbg_type = b"delta"
1608 dbg_type = b"delta"
1597
1609
1598 if gather_debug:
1610 if gather_debug:
1599 end = util.timer()
1611 end = util.timer()
1600 if dbg_type == b'full':
1612 if dbg_type == b'full':
1601 used_cached = (
1613 used_cached = (
1602 cachedelta is not None
1614 cachedelta is not None
1603 and dbg_try_rounds == 0
1615 and dbg_try_rounds == 0
1604 and dbg_try_count == 0
1616 and dbg_try_count == 0
1605 and cachedelta[0] == nullrev
1617 and cachedelta[0] == nullrev
1606 )
1618 )
1607 else:
1619 else:
1608 used_cached = (
1620 used_cached = (
1609 cachedelta is not None
1621 cachedelta is not None
1610 and dbg_try_rounds == 1
1622 and dbg_try_rounds == 1
1611 and dbg_try_count == 1
1623 and dbg_try_count == 1
1612 and deltainfo.base == cachedelta[0]
1624 and deltainfo.base == cachedelta[0]
1613 )
1625 )
1614 dbg['duration'] = end - start
1626 dbg['duration'] = end - start
1615 dbg[
1627 dbg[
1616 'delta-base'
1628 'delta-base'
1617 ] = deltainfo.base # pytype: disable=attribute-error
1629 ] = deltainfo.base # pytype: disable=attribute-error
1618 dbg['search_round_count'] = dbg_try_rounds
1630 dbg['search_round_count'] = dbg_try_rounds
1619 dbg['using-cached-base'] = used_cached
1631 dbg['using-cached-base'] = used_cached
1620 dbg['delta_try_count'] = dbg_try_count
1632 dbg['delta_try_count'] = dbg_try_count
1621 dbg['type'] = dbg_type
1633 dbg['type'] = dbg_type
1622 if (
1634 if (
1623 deltainfo.snapshotdepth # pytype: disable=attribute-error
1635 deltainfo.snapshotdepth # pytype: disable=attribute-error
1624 is not None
1636 is not None
1625 ):
1637 ):
1626 dbg[
1638 dbg[
1627 'snapshot-depth'
1639 'snapshot-depth'
1628 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1640 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1629 else:
1641 else:
1630 dbg['snapshot-depth'] = -1
1642 dbg['snapshot-depth'] = -1
1631 self._dbg_process_data(dbg)
1643 self._dbg_process_data(dbg)
1632 return deltainfo
1644 return deltainfo
1633
1645
1634 def _one_dbg_data(self):
1646 def _one_dbg_data(self):
1635 dbg = {
1647 dbg = {
1636 'duration': None,
1648 'duration': None,
1637 'revision': None,
1649 'revision': None,
1638 'delta-base': None,
1650 'delta-base': None,
1639 'search_round_count': None,
1651 'search_round_count': None,
1640 'using-cached-base': None,
1652 'using-cached-base': None,
1641 'delta_try_count': None,
1653 'delta_try_count': None,
1642 'type': None,
1654 'type': None,
1643 'p1-chain-len': None,
1655 'p1-chain-len': None,
1644 'p2-chain-len': None,
1656 'p2-chain-len': None,
1645 'snapshot-depth': None,
1657 'snapshot-depth': None,
1646 'target-revlog': None,
1658 'target-revlog': None,
1647 }
1659 }
1648 target_revlog = b"UNKNOWN"
1660 target_revlog = b"UNKNOWN"
1649 target_type = self.revlog.target[0]
1661 target_type = self.revlog.target[0]
1650 target_key = self.revlog.target[1]
1662 target_key = self.revlog.target[1]
1651 if target_type == KIND_CHANGELOG:
1663 if target_type == KIND_CHANGELOG:
1652 target_revlog = b'CHANGELOG:'
1664 target_revlog = b'CHANGELOG:'
1653 elif target_type == KIND_MANIFESTLOG:
1665 elif target_type == KIND_MANIFESTLOG:
1654 target_revlog = b'MANIFESTLOG:'
1666 target_revlog = b'MANIFESTLOG:'
1655 if target_key:
1667 if target_key:
1656 target_revlog += b'%s:' % target_key
1668 target_revlog += b'%s:' % target_key
1657 elif target_type == KIND_FILELOG:
1669 elif target_type == KIND_FILELOG:
1658 target_revlog = b'FILELOG:'
1670 target_revlog = b'FILELOG:'
1659 if target_key:
1671 if target_key:
1660 target_revlog += b'%s:' % target_key
1672 target_revlog += b'%s:' % target_key
1661 dbg['target-revlog'] = target_revlog
1673 dbg['target-revlog'] = target_revlog
1662 return dbg
1674 return dbg
1663
1675
1664 def _dbg_process_data(self, dbg):
1676 def _dbg_process_data(self, dbg):
1665 if self._debug_info is not None:
1677 if self._debug_info is not None:
1666 self._debug_info.append(dbg)
1678 self._debug_info.append(dbg)
1667
1679
1668 if self._write_debug is not None:
1680 if self._write_debug is not None:
1669 msg = (
1681 msg = (
1670 b"DBG-DELTAS:"
1682 b"DBG-DELTAS:"
1671 b" %-12s"
1683 b" %-12s"
1672 b" rev=%d:"
1684 b" rev=%d:"
1673 b" delta-base=%d"
1685 b" delta-base=%d"
1674 b" is-cached=%d"
1686 b" is-cached=%d"
1675 b" - search-rounds=%d"
1687 b" - search-rounds=%d"
1676 b" try-count=%d"
1688 b" try-count=%d"
1677 b" - delta-type=%-6s"
1689 b" - delta-type=%-6s"
1678 b" snap-depth=%d"
1690 b" snap-depth=%d"
1679 b" - p1-chain-length=%d"
1691 b" - p1-chain-length=%d"
1680 b" p2-chain-length=%d"
1692 b" p2-chain-length=%d"
1681 b" - duration=%f"
1693 b" - duration=%f"
1682 b"\n"
1694 b"\n"
1683 )
1695 )
1684 msg %= (
1696 msg %= (
1685 dbg["target-revlog"],
1697 dbg["target-revlog"],
1686 dbg["revision"],
1698 dbg["revision"],
1687 dbg["delta-base"],
1699 dbg["delta-base"],
1688 dbg["using-cached-base"],
1700 dbg["using-cached-base"],
1689 dbg["search_round_count"],
1701 dbg["search_round_count"],
1690 dbg["delta_try_count"],
1702 dbg["delta_try_count"],
1691 dbg["type"],
1703 dbg["type"],
1692 dbg["snapshot-depth"],
1704 dbg["snapshot-depth"],
1693 dbg["p1-chain-len"],
1705 dbg["p1-chain-len"],
1694 dbg["p2-chain-len"],
1706 dbg["p2-chain-len"],
1695 dbg["duration"],
1707 dbg["duration"],
1696 )
1708 )
1697 self._write_debug(msg)
1709 self._write_debug(msg)
1698
1710
1699
1711
1700 def delta_compression(default_compression_header, deltainfo):
1712 def delta_compression(default_compression_header, deltainfo):
1701 """return (COMPRESSION_MODE, deltainfo)
1713 """return (COMPRESSION_MODE, deltainfo)
1702
1714
1703 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1715 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1704 compression.
1716 compression.
1705 """
1717 """
1706 h, d = deltainfo.data
1718 h, d = deltainfo.data
1707 compression_mode = COMP_MODE_INLINE
1719 compression_mode = COMP_MODE_INLINE
1708 if not h and not d:
1720 if not h and not d:
1709 # not data to store at all... declare them uncompressed
1721 # not data to store at all... declare them uncompressed
1710 compression_mode = COMP_MODE_PLAIN
1722 compression_mode = COMP_MODE_PLAIN
1711 elif not h:
1723 elif not h:
1712 t = d[0:1]
1724 t = d[0:1]
1713 if t == b'\0':
1725 if t == b'\0':
1714 compression_mode = COMP_MODE_PLAIN
1726 compression_mode = COMP_MODE_PLAIN
1715 elif t == default_compression_header:
1727 elif t == default_compression_header:
1716 compression_mode = COMP_MODE_DEFAULT
1728 compression_mode = COMP_MODE_DEFAULT
1717 elif h == b'u':
1729 elif h == b'u':
1718 # we have a more efficient way to declare uncompressed
1730 # we have a more efficient way to declare uncompressed
1719 h = b''
1731 h = b''
1720 compression_mode = COMP_MODE_PLAIN
1732 compression_mode = COMP_MODE_PLAIN
1721 deltainfo = drop_u_compression(deltainfo)
1733 deltainfo = drop_u_compression(deltainfo)
1722 return compression_mode, deltainfo
1734 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now