##// END OF EJS Templates
delta-find: expand a function definition and call before extendin it...
marmoute -
r50507:2afee217 default
parent child Browse files
Show More
@@ -1,1386 +1,1396
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 KIND_CHANGELOG,
23 KIND_CHANGELOG,
24 KIND_FILELOG,
24 KIND_FILELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 REVIDX_ISCENSORED,
26 REVIDX_ISCENSORED,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 )
28 )
29
29
30 from ..thirdparty import attr
30 from ..thirdparty import attr
31
31
32 from .. import (
32 from .. import (
33 error,
33 error,
34 mdiff,
34 mdiff,
35 util,
35 util,
36 )
36 )
37
37
38 from . import flagutil
38 from . import flagutil
39
39
40 # maximum <delta-chain-data>/<revision-text-length> ratio
40 # maximum <delta-chain-data>/<revision-text-length> ratio
41 LIMIT_DELTA2TEXT = 2
41 LIMIT_DELTA2TEXT = 2
42
42
43
43
44 class _testrevlog:
44 class _testrevlog:
45 """minimalist fake revlog to use in doctests"""
45 """minimalist fake revlog to use in doctests"""
46
46
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 """data is an list of revision payload boundaries"""
48 """data is an list of revision payload boundaries"""
49 self._data = data
49 self._data = data
50 self._srdensitythreshold = density
50 self._srdensitythreshold = density
51 self._srmingapsize = mingap
51 self._srmingapsize = mingap
52 self._snapshot = set(snapshot)
52 self._snapshot = set(snapshot)
53 self.index = None
53 self.index = None
54
54
55 def start(self, rev):
55 def start(self, rev):
56 if rev == nullrev:
56 if rev == nullrev:
57 return 0
57 return 0
58 if rev == 0:
58 if rev == 0:
59 return 0
59 return 0
60 return self._data[rev - 1]
60 return self._data[rev - 1]
61
61
62 def end(self, rev):
62 def end(self, rev):
63 if rev == nullrev:
63 if rev == nullrev:
64 return 0
64 return 0
65 return self._data[rev]
65 return self._data[rev]
66
66
67 def length(self, rev):
67 def length(self, rev):
68 return self.end(rev) - self.start(rev)
68 return self.end(rev) - self.start(rev)
69
69
70 def __len__(self):
70 def __len__(self):
71 return len(self._data)
71 return len(self._data)
72
72
73 def issnapshot(self, rev):
73 def issnapshot(self, rev):
74 if rev == nullrev:
74 if rev == nullrev:
75 return True
75 return True
76 return rev in self._snapshot
76 return rev in self._snapshot
77
77
78
78
79 def slicechunk(revlog, revs, targetsize=None):
79 def slicechunk(revlog, revs, targetsize=None):
80 """slice revs to reduce the amount of unrelated data to be read from disk.
80 """slice revs to reduce the amount of unrelated data to be read from disk.
81
81
82 ``revs`` is sliced into groups that should be read in one time.
82 ``revs`` is sliced into groups that should be read in one time.
83 Assume that revs are sorted.
83 Assume that revs are sorted.
84
84
85 The initial chunk is sliced until the overall density (payload/chunks-span
85 The initial chunk is sliced until the overall density (payload/chunks-span
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 `revlog._srmingapsize` is skipped.
87 `revlog._srmingapsize` is skipped.
88
88
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 For consistency with other slicing choice, this limit won't go lower than
90 For consistency with other slicing choice, this limit won't go lower than
91 `revlog._srmingapsize`.
91 `revlog._srmingapsize`.
92
92
93 If individual revisions chunk are larger than this limit, they will still
93 If individual revisions chunk are larger than this limit, they will still
94 be raised individually.
94 be raised individually.
95
95
96 >>> data = [
96 >>> data = [
97 ... 5, #00 (5)
97 ... 5, #00 (5)
98 ... 10, #01 (5)
98 ... 10, #01 (5)
99 ... 12, #02 (2)
99 ... 12, #02 (2)
100 ... 12, #03 (empty)
100 ... 12, #03 (empty)
101 ... 27, #04 (15)
101 ... 27, #04 (15)
102 ... 31, #05 (4)
102 ... 31, #05 (4)
103 ... 31, #06 (empty)
103 ... 31, #06 (empty)
104 ... 42, #07 (11)
104 ... 42, #07 (11)
105 ... 47, #08 (5)
105 ... 47, #08 (5)
106 ... 47, #09 (empty)
106 ... 47, #09 (empty)
107 ... 48, #10 (1)
107 ... 48, #10 (1)
108 ... 51, #11 (3)
108 ... 51, #11 (3)
109 ... 74, #12 (23)
109 ... 74, #12 (23)
110 ... 85, #13 (11)
110 ... 85, #13 (11)
111 ... 86, #14 (1)
111 ... 86, #14 (1)
112 ... 91, #15 (5)
112 ... 91, #15 (5)
113 ... ]
113 ... ]
114 >>> revlog = _testrevlog(data, snapshot=range(16))
114 >>> revlog = _testrevlog(data, snapshot=range(16))
115
115
116 >>> list(slicechunk(revlog, list(range(16))))
116 >>> list(slicechunk(revlog, list(range(16))))
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 >>> list(slicechunk(revlog, [0, 15]))
118 >>> list(slicechunk(revlog, [0, 15]))
119 [[0], [15]]
119 [[0], [15]]
120 >>> list(slicechunk(revlog, [0, 11, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 15]))
121 [[0], [11], [15]]
121 [[0], [11], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 [[0], [11, 13, 15]]
123 [[0], [11, 13, 15]]
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 [[1, 2], [5, 8, 10, 11], [14]]
125 [[1, 2], [5, 8, 10, 11], [14]]
126
126
127 Slicing with a maximum chunk size
127 Slicing with a maximum chunk size
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 [[0], [11], [13], [15]]
129 [[0], [11], [13], [15]]
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 [[0], [11], [13, 15]]
131 [[0], [11], [13, 15]]
132
132
133 Slicing involving nullrev
133 Slicing involving nullrev
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 [[-1, 0], [11], [13, 15]]
135 [[-1, 0], [11], [13, 15]]
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 [[-1], [13], [15]]
137 [[-1], [13], [15]]
138 """
138 """
139 if targetsize is not None:
139 if targetsize is not None:
140 targetsize = max(targetsize, revlog._srmingapsize)
140 targetsize = max(targetsize, revlog._srmingapsize)
141 # targetsize should not be specified when evaluating delta candidates:
141 # targetsize should not be specified when evaluating delta candidates:
142 # * targetsize is used to ensure we stay within specification when reading,
142 # * targetsize is used to ensure we stay within specification when reading,
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 if densityslicing is None:
144 if densityslicing is None:
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 for chunk in densityslicing(
146 for chunk in densityslicing(
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 ):
148 ):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 yield subchunk
150 yield subchunk
151
151
152
152
153 def _slicechunktosize(revlog, revs, targetsize=None):
153 def _slicechunktosize(revlog, revs, targetsize=None):
154 """slice revs to match the target size
154 """slice revs to match the target size
155
155
156 This is intended to be used on chunk that density slicing selected by that
156 This is intended to be used on chunk that density slicing selected by that
157 are still too large compared to the read garantee of revlog. This might
157 are still too large compared to the read garantee of revlog. This might
158 happens when "minimal gap size" interrupted the slicing or when chain are
158 happens when "minimal gap size" interrupted the slicing or when chain are
159 built in a way that create large blocks next to each other.
159 built in a way that create large blocks next to each other.
160
160
161 >>> data = [
161 >>> data = [
162 ... 3, #0 (3)
162 ... 3, #0 (3)
163 ... 5, #1 (2)
163 ... 5, #1 (2)
164 ... 6, #2 (1)
164 ... 6, #2 (1)
165 ... 8, #3 (2)
165 ... 8, #3 (2)
166 ... 8, #4 (empty)
166 ... 8, #4 (empty)
167 ... 11, #5 (3)
167 ... 11, #5 (3)
168 ... 12, #6 (1)
168 ... 12, #6 (1)
169 ... 13, #7 (1)
169 ... 13, #7 (1)
170 ... 14, #8 (1)
170 ... 14, #8 (1)
171 ... ]
171 ... ]
172
172
173 == All snapshots cases ==
173 == All snapshots cases ==
174 >>> revlog = _testrevlog(data, snapshot=range(9))
174 >>> revlog = _testrevlog(data, snapshot=range(9))
175
175
176 Cases where chunk is already small enough
176 Cases where chunk is already small enough
177 >>> list(_slicechunktosize(revlog, [0], 3))
177 >>> list(_slicechunktosize(revlog, [0], 3))
178 [[0]]
178 [[0]]
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 [[6, 7]]
180 [[6, 7]]
181 >>> list(_slicechunktosize(revlog, [0], None))
181 >>> list(_slicechunktosize(revlog, [0], None))
182 [[0]]
182 [[0]]
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 [[6, 7]]
184 [[6, 7]]
185
185
186 cases where we need actual slicing
186 cases where we need actual slicing
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 [[0], [1]]
188 [[0], [1]]
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 [[1], [3]]
190 [[1], [3]]
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 [[1, 2], [3]]
192 [[1, 2], [3]]
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 [[3], [5]]
194 [[3], [5]]
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 [[5], [6, 7, 8]]
198 [[5], [6, 7, 8]]
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
201
201
202 Case with too large individual chunk (must return valid chunk)
202 Case with too large individual chunk (must return valid chunk)
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 [[0], [1]]
204 [[0], [1]]
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 [[1], [3]]
206 [[1], [3]]
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 [[3], [5]]
208 [[3], [5]]
209
209
210 == No Snapshot cases ==
210 == No Snapshot cases ==
211 >>> revlog = _testrevlog(data)
211 >>> revlog = _testrevlog(data)
212
212
213 Cases where chunk is already small enough
213 Cases where chunk is already small enough
214 >>> list(_slicechunktosize(revlog, [0], 3))
214 >>> list(_slicechunktosize(revlog, [0], 3))
215 [[0]]
215 [[0]]
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 [[6, 7]]
217 [[6, 7]]
218 >>> list(_slicechunktosize(revlog, [0], None))
218 >>> list(_slicechunktosize(revlog, [0], None))
219 [[0]]
219 [[0]]
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 [[6, 7]]
221 [[6, 7]]
222
222
223 cases where we need actual slicing
223 cases where we need actual slicing
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 [[0], [1]]
225 [[0], [1]]
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 [[1], [3]]
227 [[1], [3]]
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 [[1], [2, 3]]
229 [[1], [2, 3]]
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 [[3], [5]]
231 [[3], [5]]
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 [[3], [4, 5]]
233 [[3], [4, 5]]
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 [[5], [6, 7, 8]]
235 [[5], [6, 7, 8]]
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
238
238
239 Case with too large individual chunk (must return valid chunk)
239 Case with too large individual chunk (must return valid chunk)
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 [[0], [1]]
241 [[0], [1]]
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 [[1], [3]]
243 [[1], [3]]
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 [[3], [5]]
245 [[3], [5]]
246
246
247 == mixed case ==
247 == mixed case ==
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 """
251 """
252 assert targetsize is None or 0 <= targetsize
252 assert targetsize is None or 0 <= targetsize
253 startdata = revlog.start(revs[0])
253 startdata = revlog.start(revs[0])
254 enddata = revlog.end(revs[-1])
254 enddata = revlog.end(revs[-1])
255 fullspan = enddata - startdata
255 fullspan = enddata - startdata
256 if targetsize is None or fullspan <= targetsize:
256 if targetsize is None or fullspan <= targetsize:
257 yield revs
257 yield revs
258 return
258 return
259
259
260 startrevidx = 0
260 startrevidx = 0
261 endrevidx = 1
261 endrevidx = 1
262 iterrevs = enumerate(revs)
262 iterrevs = enumerate(revs)
263 next(iterrevs) # skip first rev.
263 next(iterrevs) # skip first rev.
264 # first step: get snapshots out of the way
264 # first step: get snapshots out of the way
265 for idx, r in iterrevs:
265 for idx, r in iterrevs:
266 span = revlog.end(r) - startdata
266 span = revlog.end(r) - startdata
267 snapshot = revlog.issnapshot(r)
267 snapshot = revlog.issnapshot(r)
268 if span <= targetsize and snapshot:
268 if span <= targetsize and snapshot:
269 endrevidx = idx + 1
269 endrevidx = idx + 1
270 else:
270 else:
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 if chunk:
272 if chunk:
273 yield chunk
273 yield chunk
274 startrevidx = idx
274 startrevidx = idx
275 startdata = revlog.start(r)
275 startdata = revlog.start(r)
276 endrevidx = idx + 1
276 endrevidx = idx + 1
277 if not snapshot:
277 if not snapshot:
278 break
278 break
279
279
280 # for the others, we use binary slicing to quickly converge toward valid
280 # for the others, we use binary slicing to quickly converge toward valid
281 # chunks (otherwise, we might end up looking for start/end of many
281 # chunks (otherwise, we might end up looking for start/end of many
282 # revisions). This logic is not looking for the perfect slicing point, it
282 # revisions). This logic is not looking for the perfect slicing point, it
283 # focuses on quickly converging toward valid chunks.
283 # focuses on quickly converging toward valid chunks.
284 nbitem = len(revs)
284 nbitem = len(revs)
285 while (enddata - startdata) > targetsize:
285 while (enddata - startdata) > targetsize:
286 endrevidx = nbitem
286 endrevidx = nbitem
287 if nbitem - startrevidx <= 1:
287 if nbitem - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
289 localenddata = revlog.end(revs[endrevidx - 1])
289 localenddata = revlog.end(revs[endrevidx - 1])
290 span = localenddata - startdata
290 span = localenddata - startdata
291 while span > targetsize:
291 while span > targetsize:
292 if endrevidx - startrevidx <= 1:
292 if endrevidx - startrevidx <= 1:
293 break # protect against individual chunk larger than limit
293 break # protect against individual chunk larger than limit
294 endrevidx -= (endrevidx - startrevidx) // 2
294 endrevidx -= (endrevidx - startrevidx) // 2
295 localenddata = revlog.end(revs[endrevidx - 1])
295 localenddata = revlog.end(revs[endrevidx - 1])
296 span = localenddata - startdata
296 span = localenddata - startdata
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 if chunk:
298 if chunk:
299 yield chunk
299 yield chunk
300 startrevidx = endrevidx
300 startrevidx = endrevidx
301 startdata = revlog.start(revs[startrevidx])
301 startdata = revlog.start(revs[startrevidx])
302
302
303 chunk = _trimchunk(revlog, revs, startrevidx)
303 chunk = _trimchunk(revlog, revs, startrevidx)
304 if chunk:
304 if chunk:
305 yield chunk
305 yield chunk
306
306
307
307
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 """slice revs to reduce the amount of unrelated data to be read from disk.
309 """slice revs to reduce the amount of unrelated data to be read from disk.
310
310
311 ``revs`` is sliced into groups that should be read in one time.
311 ``revs`` is sliced into groups that should be read in one time.
312 Assume that revs are sorted.
312 Assume that revs are sorted.
313
313
314 The initial chunk is sliced until the overall density (payload/chunks-span
314 The initial chunk is sliced until the overall density (payload/chunks-span
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 skipped.
316 skipped.
317
317
318 >>> revlog = _testrevlog([
318 >>> revlog = _testrevlog([
319 ... 5, #00 (5)
319 ... 5, #00 (5)
320 ... 10, #01 (5)
320 ... 10, #01 (5)
321 ... 12, #02 (2)
321 ... 12, #02 (2)
322 ... 12, #03 (empty)
322 ... 12, #03 (empty)
323 ... 27, #04 (15)
323 ... 27, #04 (15)
324 ... 31, #05 (4)
324 ... 31, #05 (4)
325 ... 31, #06 (empty)
325 ... 31, #06 (empty)
326 ... 42, #07 (11)
326 ... 42, #07 (11)
327 ... 47, #08 (5)
327 ... 47, #08 (5)
328 ... 47, #09 (empty)
328 ... 47, #09 (empty)
329 ... 48, #10 (1)
329 ... 48, #10 (1)
330 ... 51, #11 (3)
330 ... 51, #11 (3)
331 ... 74, #12 (23)
331 ... 74, #12 (23)
332 ... 85, #13 (11)
332 ... 85, #13 (11)
333 ... 86, #14 (1)
333 ... 86, #14 (1)
334 ... 91, #15 (5)
334 ... 91, #15 (5)
335 ... ])
335 ... ])
336
336
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 [[0], [15]]
340 [[0], [15]]
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 [[0], [11], [15]]
342 [[0], [11], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 [[0], [11, 13, 15]]
344 [[0], [11, 13, 15]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 [[1, 2], [5, 8, 10, 11], [14]]
346 [[1, 2], [5, 8, 10, 11], [14]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 ... mingapsize=20))
348 ... mingapsize=20))
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 ... targetdensity=0.95))
351 ... targetdensity=0.95))
352 [[1, 2], [5], [8, 10, 11], [14]]
352 [[1, 2], [5], [8, 10, 11], [14]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 ... targetdensity=0.95, mingapsize=12))
354 ... targetdensity=0.95, mingapsize=12))
355 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
356 """
356 """
357 start = revlog.start
357 start = revlog.start
358 length = revlog.length
358 length = revlog.length
359
359
360 if len(revs) <= 1:
360 if len(revs) <= 1:
361 yield revs
361 yield revs
362 return
362 return
363
363
364 deltachainspan = segmentspan(revlog, revs)
364 deltachainspan = segmentspan(revlog, revs)
365
365
366 if deltachainspan < mingapsize:
366 if deltachainspan < mingapsize:
367 yield revs
367 yield revs
368 return
368 return
369
369
370 readdata = deltachainspan
370 readdata = deltachainspan
371 chainpayload = sum(length(r) for r in revs)
371 chainpayload = sum(length(r) for r in revs)
372
372
373 if deltachainspan:
373 if deltachainspan:
374 density = chainpayload / float(deltachainspan)
374 density = chainpayload / float(deltachainspan)
375 else:
375 else:
376 density = 1.0
376 density = 1.0
377
377
378 if density >= targetdensity:
378 if density >= targetdensity:
379 yield revs
379 yield revs
380 return
380 return
381
381
382 # Store the gaps in a heap to have them sorted by decreasing size
382 # Store the gaps in a heap to have them sorted by decreasing size
383 gaps = []
383 gaps = []
384 prevend = None
384 prevend = None
385 for i, rev in enumerate(revs):
385 for i, rev in enumerate(revs):
386 revstart = start(rev)
386 revstart = start(rev)
387 revlen = length(rev)
387 revlen = length(rev)
388
388
389 # Skip empty revisions to form larger holes
389 # Skip empty revisions to form larger holes
390 if revlen == 0:
390 if revlen == 0:
391 continue
391 continue
392
392
393 if prevend is not None:
393 if prevend is not None:
394 gapsize = revstart - prevend
394 gapsize = revstart - prevend
395 # only consider holes that are large enough
395 # only consider holes that are large enough
396 if gapsize > mingapsize:
396 if gapsize > mingapsize:
397 gaps.append((gapsize, i))
397 gaps.append((gapsize, i))
398
398
399 prevend = revstart + revlen
399 prevend = revstart + revlen
400 # sort the gaps to pop them from largest to small
400 # sort the gaps to pop them from largest to small
401 gaps.sort()
401 gaps.sort()
402
402
403 # Collect the indices of the largest holes until the density is acceptable
403 # Collect the indices of the largest holes until the density is acceptable
404 selected = []
404 selected = []
405 while gaps and density < targetdensity:
405 while gaps and density < targetdensity:
406 gapsize, gapidx = gaps.pop()
406 gapsize, gapidx = gaps.pop()
407
407
408 selected.append(gapidx)
408 selected.append(gapidx)
409
409
410 # the gap sizes are stored as negatives to be sorted decreasingly
410 # the gap sizes are stored as negatives to be sorted decreasingly
411 # by the heap
411 # by the heap
412 readdata -= gapsize
412 readdata -= gapsize
413 if readdata > 0:
413 if readdata > 0:
414 density = chainpayload / float(readdata)
414 density = chainpayload / float(readdata)
415 else:
415 else:
416 density = 1.0
416 density = 1.0
417 selected.sort()
417 selected.sort()
418
418
419 # Cut the revs at collected indices
419 # Cut the revs at collected indices
420 previdx = 0
420 previdx = 0
421 for idx in selected:
421 for idx in selected:
422
422
423 chunk = _trimchunk(revlog, revs, previdx, idx)
423 chunk = _trimchunk(revlog, revs, previdx, idx)
424 if chunk:
424 if chunk:
425 yield chunk
425 yield chunk
426
426
427 previdx = idx
427 previdx = idx
428
428
429 chunk = _trimchunk(revlog, revs, previdx)
429 chunk = _trimchunk(revlog, revs, previdx)
430 if chunk:
430 if chunk:
431 yield chunk
431 yield chunk
432
432
433
433
434 def _trimchunk(revlog, revs, startidx, endidx=None):
434 def _trimchunk(revlog, revs, startidx, endidx=None):
435 """returns revs[startidx:endidx] without empty trailing revs
435 """returns revs[startidx:endidx] without empty trailing revs
436
436
437 Doctest Setup
437 Doctest Setup
438 >>> revlog = _testrevlog([
438 >>> revlog = _testrevlog([
439 ... 5, #0
439 ... 5, #0
440 ... 10, #1
440 ... 10, #1
441 ... 12, #2
441 ... 12, #2
442 ... 12, #3 (empty)
442 ... 12, #3 (empty)
443 ... 17, #4
443 ... 17, #4
444 ... 21, #5
444 ... 21, #5
445 ... 21, #6 (empty)
445 ... 21, #6 (empty)
446 ... ])
446 ... ])
447
447
448 Contiguous cases:
448 Contiguous cases:
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 [0, 1, 2, 3, 4, 5]
450 [0, 1, 2, 3, 4, 5]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 [0, 1, 2, 3, 4]
452 [0, 1, 2, 3, 4]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 [0, 1, 2]
454 [0, 1, 2]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 [2]
456 [2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 [3, 4, 5]
458 [3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 [3, 4]
460 [3, 4]
461
461
462 Discontiguous cases:
462 Discontiguous cases:
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 [1, 3, 5]
464 [1, 3, 5]
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 [1]
466 [1]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 [3, 5]
468 [3, 5]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 [3, 5]
470 [3, 5]
471 """
471 """
472 length = revlog.length
472 length = revlog.length
473
473
474 if endidx is None:
474 if endidx is None:
475 endidx = len(revs)
475 endidx = len(revs)
476
476
477 # If we have a non-emtpy delta candidate, there are nothing to trim
477 # If we have a non-emtpy delta candidate, there are nothing to trim
478 if revs[endidx - 1] < len(revlog):
478 if revs[endidx - 1] < len(revlog):
479 # Trim empty revs at the end, except the very first revision of a chain
479 # Trim empty revs at the end, except the very first revision of a chain
480 while (
480 while (
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 ):
482 ):
483 endidx -= 1
483 endidx -= 1
484
484
485 return revs[startidx:endidx]
485 return revs[startidx:endidx]
486
486
487
487
488 def segmentspan(revlog, revs):
488 def segmentspan(revlog, revs):
489 """Get the byte span of a segment of revisions
489 """Get the byte span of a segment of revisions
490
490
491 revs is a sorted array of revision numbers
491 revs is a sorted array of revision numbers
492
492
493 >>> revlog = _testrevlog([
493 >>> revlog = _testrevlog([
494 ... 5, #0
494 ... 5, #0
495 ... 10, #1
495 ... 10, #1
496 ... 12, #2
496 ... 12, #2
497 ... 12, #3 (empty)
497 ... 12, #3 (empty)
498 ... 17, #4
498 ... 17, #4
499 ... ])
499 ... ])
500
500
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 17
502 17
503 >>> segmentspan(revlog, [0, 4])
503 >>> segmentspan(revlog, [0, 4])
504 17
504 17
505 >>> segmentspan(revlog, [3, 4])
505 >>> segmentspan(revlog, [3, 4])
506 5
506 5
507 >>> segmentspan(revlog, [1, 2, 3,])
507 >>> segmentspan(revlog, [1, 2, 3,])
508 7
508 7
509 >>> segmentspan(revlog, [1, 3])
509 >>> segmentspan(revlog, [1, 3])
510 7
510 7
511 """
511 """
512 if not revs:
512 if not revs:
513 return 0
513 return 0
514 end = revlog.end(revs[-1])
514 end = revlog.end(revs[-1])
515 return end - revlog.start(revs[0])
515 return end - revlog.start(revs[0])
516
516
517
517
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 """build full text from a (base, delta) pair and other metadata"""
519 """build full text from a (base, delta) pair and other metadata"""
520 # special case deltas which replace entire base; no need to decode
520 # special case deltas which replace entire base; no need to decode
521 # base revision. this neatly avoids censored bases, which throw when
521 # base revision. this neatly avoids censored bases, which throw when
522 # they're decoded.
522 # they're decoded.
523 hlen = struct.calcsize(b">lll")
523 hlen = struct.calcsize(b">lll")
524 if delta[:hlen] == mdiff.replacediffheader(
524 if delta[:hlen] == mdiff.replacediffheader(
525 revlog.rawsize(baserev), len(delta) - hlen
525 revlog.rawsize(baserev), len(delta) - hlen
526 ):
526 ):
527 fulltext = delta[hlen:]
527 fulltext = delta[hlen:]
528 else:
528 else:
529 # deltabase is rawtext before changed by flag processors, which is
529 # deltabase is rawtext before changed by flag processors, which is
530 # equivalent to non-raw text
530 # equivalent to non-raw text
531 basetext = revlog.revision(baserev, _df=fh)
531 basetext = revlog.revision(baserev, _df=fh)
532 fulltext = mdiff.patch(basetext, delta)
532 fulltext = mdiff.patch(basetext, delta)
533
533
534 try:
534 try:
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 if validatehash:
536 if validatehash:
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 if flags & REVIDX_ISCENSORED:
538 if flags & REVIDX_ISCENSORED:
539 raise error.StorageError(
539 raise error.StorageError(
540 _(b'node %s is not censored') % expectednode
540 _(b'node %s is not censored') % expectednode
541 )
541 )
542 except error.CensoredNodeError:
542 except error.CensoredNodeError:
543 # must pass the censored index flag to add censored revisions
543 # must pass the censored index flag to add censored revisions
544 if not flags & REVIDX_ISCENSORED:
544 if not flags & REVIDX_ISCENSORED:
545 raise
545 raise
546 return fulltext
546 return fulltext
547
547
548
548
549 @attr.s(slots=True, frozen=True)
549 @attr.s(slots=True, frozen=True)
550 class _deltainfo:
550 class _deltainfo:
551 distance = attr.ib()
551 distance = attr.ib()
552 deltalen = attr.ib()
552 deltalen = attr.ib()
553 data = attr.ib()
553 data = attr.ib()
554 base = attr.ib()
554 base = attr.ib()
555 chainbase = attr.ib()
555 chainbase = attr.ib()
556 chainlen = attr.ib()
556 chainlen = attr.ib()
557 compresseddeltalen = attr.ib()
557 compresseddeltalen = attr.ib()
558 snapshotdepth = attr.ib()
558 snapshotdepth = attr.ib()
559
559
560
560
561 def drop_u_compression(delta):
561 def drop_u_compression(delta):
562 """turn into a "u" (no-compression) into no-compression without header
562 """turn into a "u" (no-compression) into no-compression without header
563
563
564 This is useful for revlog format that has better compression method.
564 This is useful for revlog format that has better compression method.
565 """
565 """
566 assert delta.data[0] == b'u', delta.data[0]
566 assert delta.data[0] == b'u', delta.data[0]
567 return _deltainfo(
567 return _deltainfo(
568 delta.distance,
568 delta.distance,
569 delta.deltalen - 1,
569 delta.deltalen - 1,
570 (b'', delta.data[1]),
570 (b'', delta.data[1]),
571 delta.base,
571 delta.base,
572 delta.chainbase,
572 delta.chainbase,
573 delta.chainlen,
573 delta.chainlen,
574 delta.compresseddeltalen,
574 delta.compresseddeltalen,
575 delta.snapshotdepth,
575 delta.snapshotdepth,
576 )
576 )
577
577
578
578
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
580 """Returns True if the given delta is good. Good means that it is within
580 """Returns True if the given delta is good. Good means that it is within
581 the disk span, disk size, and chain length bounds that we know to be
581 the disk span, disk size, and chain length bounds that we know to be
582 performant."""
582 performant."""
583 if deltainfo is None:
583 if deltainfo is None:
584 return False
584 return False
585
585
586 # - 'deltainfo.distance' is the distance from the base revision --
586 # - 'deltainfo.distance' is the distance from the base revision --
587 # bounding it limits the amount of I/O we need to do.
587 # bounding it limits the amount of I/O we need to do.
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 # deltas we need to apply -- bounding it limits the amount of CPU
589 # deltas we need to apply -- bounding it limits the amount of CPU
590 # we consume.
590 # we consume.
591
591
592 textlen = revinfo.textlen
592 textlen = revinfo.textlen
593 defaultmax = textlen * 4
593 defaultmax = textlen * 4
594 maxdist = revlog._maxdeltachainspan
594 maxdist = revlog._maxdeltachainspan
595 if not maxdist:
595 if not maxdist:
596 maxdist = deltainfo.distance # ensure the conditional pass
596 maxdist = deltainfo.distance # ensure the conditional pass
597 maxdist = max(maxdist, defaultmax)
597 maxdist = max(maxdist, defaultmax)
598
598
599 # Bad delta from read span:
599 # Bad delta from read span:
600 #
600 #
601 # If the span of data read is larger than the maximum allowed.
601 # If the span of data read is larger than the maximum allowed.
602 #
602 #
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 # to avoid issue related to the span of data. In theory, it would be
604 # to avoid issue related to the span of data. In theory, it would be
605 # possible to build pathological revlog where delta pattern would lead
605 # possible to build pathological revlog where delta pattern would lead
606 # to too many reads. However, they do not happen in practice at all. So
606 # to too many reads. However, they do not happen in practice at all. So
607 # we skip the span check entirely.
607 # we skip the span check entirely.
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 return False
609 return False
610
610
611 # Bad delta from new delta size:
611 # Bad delta from new delta size:
612 #
612 #
613 # If the delta size is larger than the target text, storing the
613 # If the delta size is larger than the target text, storing the
614 # delta will be inefficient.
614 # delta will be inefficient.
615 if textlen < deltainfo.deltalen:
615 if textlen < deltainfo.deltalen:
616 return False
616 return False
617
617
618 # Bad delta from cumulated payload size:
618 # Bad delta from cumulated payload size:
619 #
619 #
620 # If the sum of delta get larger than K * target text length.
620 # If the sum of delta get larger than K * target text length.
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 return False
622 return False
623
623
624 # Bad delta from chain length:
624 # Bad delta from chain length:
625 #
625 #
626 # If the number of delta in the chain gets too high.
626 # If the number of delta in the chain gets too high.
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 return False
628 return False
629
629
630 # bad delta from intermediate snapshot size limit
630 # bad delta from intermediate snapshot size limit
631 #
631 #
632 # If an intermediate snapshot size is higher than the limit. The
632 # If an intermediate snapshot size is higher than the limit. The
633 # limit exist to prevent endless chain of intermediate delta to be
633 # limit exist to prevent endless chain of intermediate delta to be
634 # created.
634 # created.
635 if (
635 if (
636 deltainfo.snapshotdepth is not None
636 deltainfo.snapshotdepth is not None
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 ):
638 ):
639 return False
639 return False
640
640
641 # bad delta if new intermediate snapshot is larger than the previous
641 # bad delta if new intermediate snapshot is larger than the previous
642 # snapshot
642 # snapshot
643 if (
643 if (
644 deltainfo.snapshotdepth
644 deltainfo.snapshotdepth
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 ):
646 ):
647 return False
647 return False
648
648
649 return True
649 return True
650
650
651
651
652 # If a revision's full text is that much bigger than a base candidate full
652 # If a revision's full text is that much bigger than a base candidate full
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 # consider these candidates.
654 # consider these candidates.
655 LIMIT_BASE2TEXT = 500
655 LIMIT_BASE2TEXT = 500
656
656
657
657
658 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
658 def _candidategroups(
659 revlog,
660 textlen,
661 p1,
662 p2,
663 cachedelta,
664 ):
659 """Provides group of revision to be tested as delta base
665 """Provides group of revision to be tested as delta base
660
666
661 This top level function focus on emitting groups with unique and worthwhile
667 This top level function focus on emitting groups with unique and worthwhile
662 content. See _raw_candidate_groups for details about the group order.
668 content. See _raw_candidate_groups for details about the group order.
663 """
669 """
664 # should we try to build a delta?
670 # should we try to build a delta?
665 if not (len(revlog) and revlog._storedeltachains):
671 if not (len(revlog) and revlog._storedeltachains):
666 yield None
672 yield None
667 return
673 return
668
674
669 deltalength = revlog.length
675 deltalength = revlog.length
670 deltaparent = revlog.deltaparent
676 deltaparent = revlog.deltaparent
671 sparse = revlog._sparserevlog
677 sparse = revlog._sparserevlog
672 good = None
678 good = None
673
679
674 deltas_limit = textlen * LIMIT_DELTA2TEXT
680 deltas_limit = textlen * LIMIT_DELTA2TEXT
675
681
676 tested = {nullrev}
682 tested = {nullrev}
677 candidates = _refinedgroups(
683 candidates = _refinedgroups(
678 revlog,
684 revlog,
679 p1,
685 p1,
680 p2,
686 p2,
681 cachedelta,
687 cachedelta,
682 )
688 )
683 while True:
689 while True:
684 temptative = candidates.send(good)
690 temptative = candidates.send(good)
685 if temptative is None:
691 if temptative is None:
686 break
692 break
687 group = []
693 group = []
688 for rev in temptative:
694 for rev in temptative:
689 # skip over empty delta (no need to include them in a chain)
695 # skip over empty delta (no need to include them in a chain)
690 while revlog._generaldelta and not (
696 while revlog._generaldelta and not (
691 rev == nullrev or rev in tested or deltalength(rev)
697 rev == nullrev or rev in tested or deltalength(rev)
692 ):
698 ):
693 tested.add(rev)
699 tested.add(rev)
694 rev = deltaparent(rev)
700 rev = deltaparent(rev)
695 # no need to try a delta against nullrev, this will be done as a
701 # no need to try a delta against nullrev, this will be done as a
696 # last resort.
702 # last resort.
697 if rev == nullrev:
703 if rev == nullrev:
698 continue
704 continue
699 # filter out revision we tested already
705 # filter out revision we tested already
700 if rev in tested:
706 if rev in tested:
701 continue
707 continue
702 tested.add(rev)
708 tested.add(rev)
703 # filter out delta base that will never produce good delta
709 # filter out delta base that will never produce good delta
704 if deltas_limit < revlog.length(rev):
710 if deltas_limit < revlog.length(rev):
705 continue
711 continue
706 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
712 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
707 continue
713 continue
708 # no delta for rawtext-changing revs (see "candelta" for why)
714 # no delta for rawtext-changing revs (see "candelta" for why)
709 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
715 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
710 continue
716 continue
711
717
712 # If we reach here, we are about to build and test a delta.
718 # If we reach here, we are about to build and test a delta.
713 # The delta building process will compute the chaininfo in all
719 # The delta building process will compute the chaininfo in all
714 # case, since that computation is cached, it is fine to access it
720 # case, since that computation is cached, it is fine to access it
715 # here too.
721 # here too.
716 chainlen, chainsize = revlog._chaininfo(rev)
722 chainlen, chainsize = revlog._chaininfo(rev)
717 # if chain will be too long, skip base
723 # if chain will be too long, skip base
718 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
724 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
719 continue
725 continue
720 # if chain already have too much data, skip base
726 # if chain already have too much data, skip base
721 if deltas_limit < chainsize:
727 if deltas_limit < chainsize:
722 continue
728 continue
723 if sparse and revlog.upperboundcomp is not None:
729 if sparse and revlog.upperboundcomp is not None:
724 maxcomp = revlog.upperboundcomp
730 maxcomp = revlog.upperboundcomp
725 basenotsnap = (p1, p2, nullrev)
731 basenotsnap = (p1, p2, nullrev)
726 if rev not in basenotsnap and revlog.issnapshot(rev):
732 if rev not in basenotsnap and revlog.issnapshot(rev):
727 snapshotdepth = revlog.snapshotdepth(rev)
733 snapshotdepth = revlog.snapshotdepth(rev)
728 # If text is significantly larger than the base, we can
734 # If text is significantly larger than the base, we can
729 # expect the resulting delta to be proportional to the size
735 # expect the resulting delta to be proportional to the size
730 # difference
736 # difference
731 revsize = revlog.rawsize(rev)
737 revsize = revlog.rawsize(rev)
732 rawsizedistance = max(textlen - revsize, 0)
738 rawsizedistance = max(textlen - revsize, 0)
733 # use an estimate of the compression upper bound.
739 # use an estimate of the compression upper bound.
734 lowestrealisticdeltalen = rawsizedistance // maxcomp
740 lowestrealisticdeltalen = rawsizedistance // maxcomp
735
741
736 # check the absolute constraint on the delta size
742 # check the absolute constraint on the delta size
737 snapshotlimit = textlen >> snapshotdepth
743 snapshotlimit = textlen >> snapshotdepth
738 if snapshotlimit < lowestrealisticdeltalen:
744 if snapshotlimit < lowestrealisticdeltalen:
739 # delta lower bound is larger than accepted upper bound
745 # delta lower bound is larger than accepted upper bound
740 continue
746 continue
741
747
742 # check the relative constraint on the delta size
748 # check the relative constraint on the delta size
743 revlength = revlog.length(rev)
749 revlength = revlog.length(rev)
744 if revlength < lowestrealisticdeltalen:
750 if revlength < lowestrealisticdeltalen:
745 # delta probable lower bound is larger than target base
751 # delta probable lower bound is larger than target base
746 continue
752 continue
747
753
748 group.append(rev)
754 group.append(rev)
749 if group:
755 if group:
750 # XXX: in the sparse revlog case, group can become large,
756 # XXX: in the sparse revlog case, group can become large,
751 # impacting performances. Some bounding or slicing mecanism
757 # impacting performances. Some bounding or slicing mecanism
752 # would help to reduce this impact.
758 # would help to reduce this impact.
753 good = yield tuple(group)
759 good = yield tuple(group)
754 yield None
760 yield None
755
761
756
762
757 def _findsnapshots(revlog, cache, start_rev):
763 def _findsnapshots(revlog, cache, start_rev):
758 """find snapshot from start_rev to tip"""
764 """find snapshot from start_rev to tip"""
759 if util.safehasattr(revlog.index, b'findsnapshots'):
765 if util.safehasattr(revlog.index, b'findsnapshots'):
760 revlog.index.findsnapshots(cache, start_rev)
766 revlog.index.findsnapshots(cache, start_rev)
761 else:
767 else:
762 deltaparent = revlog.deltaparent
768 deltaparent = revlog.deltaparent
763 issnapshot = revlog.issnapshot
769 issnapshot = revlog.issnapshot
764 for rev in revlog.revs(start_rev):
770 for rev in revlog.revs(start_rev):
765 if issnapshot(rev):
771 if issnapshot(rev):
766 cache[deltaparent(rev)].append(rev)
772 cache[deltaparent(rev)].append(rev)
767
773
768
774
769 def _refinedgroups(revlog, p1, p2, cachedelta):
775 def _refinedgroups(revlog, p1, p2, cachedelta):
770 good = None
776 good = None
771 # First we try to reuse a the delta contained in the bundle.
777 # First we try to reuse a the delta contained in the bundle.
772 # (or from the source revlog)
778 # (or from the source revlog)
773 #
779 #
774 # This logic only applies to general delta repositories and can be disabled
780 # This logic only applies to general delta repositories and can be disabled
775 # through configuration. Disabling reuse source delta is useful when
781 # through configuration. Disabling reuse source delta is useful when
776 # we want to make sure we recomputed "optimal" deltas.
782 # we want to make sure we recomputed "optimal" deltas.
777 debug_info = None
783 debug_info = None
778 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
784 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
779 # Assume what we received from the server is a good choice
785 # Assume what we received from the server is a good choice
780 # build delta will reuse the cache
786 # build delta will reuse the cache
781 if debug_info is not None:
787 if debug_info is not None:
782 debug_info['cached-delta.tested'] += 1
788 debug_info['cached-delta.tested'] += 1
783 good = yield (cachedelta[0],)
789 good = yield (cachedelta[0],)
784 if good is not None:
790 if good is not None:
785 if debug_info is not None:
791 if debug_info is not None:
786 debug_info['cached-delta.accepted'] += 1
792 debug_info['cached-delta.accepted'] += 1
787 yield None
793 yield None
788 return
794 return
789 # XXX cache me higher
795 # XXX cache me higher
790 snapshots = collections.defaultdict(list)
796 snapshots = collections.defaultdict(list)
791 groups = _rawgroups(
797 groups = _rawgroups(
792 revlog,
798 revlog,
793 p1,
799 p1,
794 p2,
800 p2,
795 cachedelta,
801 cachedelta,
796 snapshots,
802 snapshots,
797 )
803 )
798 for candidates in groups:
804 for candidates in groups:
799 good = yield candidates
805 good = yield candidates
800 if good is not None:
806 if good is not None:
801 break
807 break
802
808
803 # If sparse revlog is enabled, we can try to refine the available deltas
809 # If sparse revlog is enabled, we can try to refine the available deltas
804 if not revlog._sparserevlog:
810 if not revlog._sparserevlog:
805 yield None
811 yield None
806 return
812 return
807
813
808 # if we have a refinable value, try to refine it
814 # if we have a refinable value, try to refine it
809 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
815 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
810 # refine snapshot down
816 # refine snapshot down
811 previous = None
817 previous = None
812 while previous != good:
818 while previous != good:
813 previous = good
819 previous = good
814 base = revlog.deltaparent(good)
820 base = revlog.deltaparent(good)
815 if base == nullrev:
821 if base == nullrev:
816 break
822 break
817 good = yield (base,)
823 good = yield (base,)
818 # refine snapshot up
824 # refine snapshot up
819 if not snapshots:
825 if not snapshots:
820 _findsnapshots(revlog, snapshots, good + 1)
826 _findsnapshots(revlog, snapshots, good + 1)
821 previous = None
827 previous = None
822 while good != previous:
828 while good != previous:
823 previous = good
829 previous = good
824 children = tuple(sorted(c for c in snapshots[good]))
830 children = tuple(sorted(c for c in snapshots[good]))
825 good = yield children
831 good = yield children
826
832
827 if debug_info is not None:
833 if debug_info is not None:
828 if good is None:
834 if good is None:
829 debug_info['no-solution'] += 1
835 debug_info['no-solution'] += 1
830
836
831 yield None
837 yield None
832
838
833
839
834 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
840 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
835 """Provides group of revision to be tested as delta base
841 """Provides group of revision to be tested as delta base
836
842
837 This lower level function focus on emitting delta theorically interresting
843 This lower level function focus on emitting delta theorically interresting
838 without looking it any practical details.
844 without looking it any practical details.
839
845
840 The group order aims at providing fast or small candidates first.
846 The group order aims at providing fast or small candidates first.
841 """
847 """
842 gdelta = revlog._generaldelta
848 gdelta = revlog._generaldelta
843 # gate sparse behind general-delta because of issue6056
849 # gate sparse behind general-delta because of issue6056
844 sparse = gdelta and revlog._sparserevlog
850 sparse = gdelta and revlog._sparserevlog
845 curr = len(revlog)
851 curr = len(revlog)
846 prev = curr - 1
852 prev = curr - 1
847 deltachain = lambda rev: revlog._deltachain(rev)[0]
853 deltachain = lambda rev: revlog._deltachain(rev)[0]
848
854
849 if gdelta:
855 if gdelta:
850 # exclude already lazy tested base if any
856 # exclude already lazy tested base if any
851 parents = [p for p in (p1, p2) if p != nullrev]
857 parents = [p for p in (p1, p2) if p != nullrev]
852
858
853 if not revlog._deltabothparents and len(parents) == 2:
859 if not revlog._deltabothparents and len(parents) == 2:
854 parents.sort()
860 parents.sort()
855 # To minimize the chance of having to build a fulltext,
861 # To minimize the chance of having to build a fulltext,
856 # pick first whichever parent is closest to us (max rev)
862 # pick first whichever parent is closest to us (max rev)
857 yield (parents[1],)
863 yield (parents[1],)
858 # then the other one (min rev) if the first did not fit
864 # then the other one (min rev) if the first did not fit
859 yield (parents[0],)
865 yield (parents[0],)
860 elif len(parents) > 0:
866 elif len(parents) > 0:
861 # Test all parents (1 or 2), and keep the best candidate
867 # Test all parents (1 or 2), and keep the best candidate
862 yield parents
868 yield parents
863
869
864 if sparse and parents:
870 if sparse and parents:
865 if snapshots is None:
871 if snapshots is None:
866 # map: base-rev: snapshot-rev
872 # map: base-rev: snapshot-rev
867 snapshots = collections.defaultdict(list)
873 snapshots = collections.defaultdict(list)
868 # See if we can use an existing snapshot in the parent chains to use as
874 # See if we can use an existing snapshot in the parent chains to use as
869 # a base for a new intermediate-snapshot
875 # a base for a new intermediate-snapshot
870 #
876 #
871 # search for snapshot in parents delta chain
877 # search for snapshot in parents delta chain
872 # map: snapshot-level: snapshot-rev
878 # map: snapshot-level: snapshot-rev
873 parents_snaps = collections.defaultdict(set)
879 parents_snaps = collections.defaultdict(set)
874 candidate_chains = [deltachain(p) for p in parents]
880 candidate_chains = [deltachain(p) for p in parents]
875 for chain in candidate_chains:
881 for chain in candidate_chains:
876 for idx, s in enumerate(chain):
882 for idx, s in enumerate(chain):
877 if not revlog.issnapshot(s):
883 if not revlog.issnapshot(s):
878 break
884 break
879 parents_snaps[idx].add(s)
885 parents_snaps[idx].add(s)
880 snapfloor = min(parents_snaps[0]) + 1
886 snapfloor = min(parents_snaps[0]) + 1
881 _findsnapshots(revlog, snapshots, snapfloor)
887 _findsnapshots(revlog, snapshots, snapfloor)
882 # search for the highest "unrelated" revision
888 # search for the highest "unrelated" revision
883 #
889 #
884 # Adding snapshots used by "unrelated" revision increase the odd we
890 # Adding snapshots used by "unrelated" revision increase the odd we
885 # reuse an independant, yet better snapshot chain.
891 # reuse an independant, yet better snapshot chain.
886 #
892 #
887 # XXX instead of building a set of revisions, we could lazily enumerate
893 # XXX instead of building a set of revisions, we could lazily enumerate
888 # over the chains. That would be more efficient, however we stick to
894 # over the chains. That would be more efficient, however we stick to
889 # simple code for now.
895 # simple code for now.
890 all_revs = set()
896 all_revs = set()
891 for chain in candidate_chains:
897 for chain in candidate_chains:
892 all_revs.update(chain)
898 all_revs.update(chain)
893 other = None
899 other = None
894 for r in revlog.revs(prev, snapfloor):
900 for r in revlog.revs(prev, snapfloor):
895 if r not in all_revs:
901 if r not in all_revs:
896 other = r
902 other = r
897 break
903 break
898 if other is not None:
904 if other is not None:
899 # To avoid unfair competition, we won't use unrelated intermediate
905 # To avoid unfair competition, we won't use unrelated intermediate
900 # snapshot that are deeper than the ones from the parent delta
906 # snapshot that are deeper than the ones from the parent delta
901 # chain.
907 # chain.
902 max_depth = max(parents_snaps.keys())
908 max_depth = max(parents_snaps.keys())
903 chain = deltachain(other)
909 chain = deltachain(other)
904 for idx, s in enumerate(chain):
910 for idx, s in enumerate(chain):
905 if s < snapfloor:
911 if s < snapfloor:
906 continue
912 continue
907 if max_depth < idx:
913 if max_depth < idx:
908 break
914 break
909 if not revlog.issnapshot(s):
915 if not revlog.issnapshot(s):
910 break
916 break
911 parents_snaps[idx].add(s)
917 parents_snaps[idx].add(s)
912 # Test them as possible intermediate snapshot base
918 # Test them as possible intermediate snapshot base
913 # We test them from highest to lowest level. High level one are more
919 # We test them from highest to lowest level. High level one are more
914 # likely to result in small delta
920 # likely to result in small delta
915 floor = None
921 floor = None
916 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
922 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
917 siblings = set()
923 siblings = set()
918 for s in snaps:
924 for s in snaps:
919 siblings.update(snapshots[s])
925 siblings.update(snapshots[s])
920 # Before considering making a new intermediate snapshot, we check
926 # Before considering making a new intermediate snapshot, we check
921 # if an existing snapshot, children of base we consider, would be
927 # if an existing snapshot, children of base we consider, would be
922 # suitable.
928 # suitable.
923 #
929 #
924 # It give a change to reuse a delta chain "unrelated" to the
930 # It give a change to reuse a delta chain "unrelated" to the
925 # current revision instead of starting our own. Without such
931 # current revision instead of starting our own. Without such
926 # re-use, topological branches would keep reopening new chains.
932 # re-use, topological branches would keep reopening new chains.
927 # Creating more and more snapshot as the repository grow.
933 # Creating more and more snapshot as the repository grow.
928
934
929 if floor is not None:
935 if floor is not None:
930 # We only do this for siblings created after the one in our
936 # We only do this for siblings created after the one in our
931 # parent's delta chain. Those created before has less chances
937 # parent's delta chain. Those created before has less chances
932 # to be valid base since our ancestors had to create a new
938 # to be valid base since our ancestors had to create a new
933 # snapshot.
939 # snapshot.
934 siblings = [r for r in siblings if floor < r]
940 siblings = [r for r in siblings if floor < r]
935 yield tuple(sorted(siblings))
941 yield tuple(sorted(siblings))
936 # then test the base from our parent's delta chain.
942 # then test the base from our parent's delta chain.
937 yield tuple(sorted(snaps))
943 yield tuple(sorted(snaps))
938 floor = min(snaps)
944 floor = min(snaps)
939 # No suitable base found in the parent chain, search if any full
945 # No suitable base found in the parent chain, search if any full
940 # snapshots emitted since parent's base would be a suitable base for an
946 # snapshots emitted since parent's base would be a suitable base for an
941 # intermediate snapshot.
947 # intermediate snapshot.
942 #
948 #
943 # It give a chance to reuse a delta chain unrelated to the current
949 # It give a chance to reuse a delta chain unrelated to the current
944 # revisions instead of starting our own. Without such re-use,
950 # revisions instead of starting our own. Without such re-use,
945 # topological branches would keep reopening new full chains. Creating
951 # topological branches would keep reopening new full chains. Creating
946 # more and more snapshot as the repository grow.
952 # more and more snapshot as the repository grow.
947 yield tuple(snapshots[nullrev])
953 yield tuple(snapshots[nullrev])
948
954
949 if not sparse:
955 if not sparse:
950 # other approach failed try against prev to hopefully save us a
956 # other approach failed try against prev to hopefully save us a
951 # fulltext.
957 # fulltext.
952 yield (prev,)
958 yield (prev,)
953
959
954
960
955 class deltacomputer:
961 class deltacomputer:
956 def __init__(
962 def __init__(
957 self,
963 self,
958 revlog,
964 revlog,
959 write_debug=None,
965 write_debug=None,
960 debug_search=False,
966 debug_search=False,
961 debug_info=None,
967 debug_info=None,
962 ):
968 ):
963 self.revlog = revlog
969 self.revlog = revlog
964 self._write_debug = write_debug
970 self._write_debug = write_debug
965 self._debug_search = debug_search
971 self._debug_search = debug_search
966 self._debug_info = debug_info
972 self._debug_info = debug_info
967
973
968 def buildtext(self, revinfo, fh):
974 def buildtext(self, revinfo, fh):
969 """Builds a fulltext version of a revision
975 """Builds a fulltext version of a revision
970
976
971 revinfo: revisioninfo instance that contains all needed info
977 revinfo: revisioninfo instance that contains all needed info
972 fh: file handle to either the .i or the .d revlog file,
978 fh: file handle to either the .i or the .d revlog file,
973 depending on whether it is inlined or not
979 depending on whether it is inlined or not
974 """
980 """
975 btext = revinfo.btext
981 btext = revinfo.btext
976 if btext[0] is not None:
982 if btext[0] is not None:
977 return btext[0]
983 return btext[0]
978
984
979 revlog = self.revlog
985 revlog = self.revlog
980 cachedelta = revinfo.cachedelta
986 cachedelta = revinfo.cachedelta
981 baserev = cachedelta[0]
987 baserev = cachedelta[0]
982 delta = cachedelta[1]
988 delta = cachedelta[1]
983
989
984 fulltext = btext[0] = _textfromdelta(
990 fulltext = btext[0] = _textfromdelta(
985 fh,
991 fh,
986 revlog,
992 revlog,
987 baserev,
993 baserev,
988 delta,
994 delta,
989 revinfo.p1,
995 revinfo.p1,
990 revinfo.p2,
996 revinfo.p2,
991 revinfo.flags,
997 revinfo.flags,
992 revinfo.node,
998 revinfo.node,
993 )
999 )
994 return fulltext
1000 return fulltext
995
1001
996 def _builddeltadiff(self, base, revinfo, fh):
1002 def _builddeltadiff(self, base, revinfo, fh):
997 revlog = self.revlog
1003 revlog = self.revlog
998 t = self.buildtext(revinfo, fh)
1004 t = self.buildtext(revinfo, fh)
999 if revlog.iscensored(base):
1005 if revlog.iscensored(base):
1000 # deltas based on a censored revision must replace the
1006 # deltas based on a censored revision must replace the
1001 # full content in one patch, so delta works everywhere
1007 # full content in one patch, so delta works everywhere
1002 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1008 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1003 delta = header + t
1009 delta = header + t
1004 else:
1010 else:
1005 ptext = revlog.rawdata(base, _df=fh)
1011 ptext = revlog.rawdata(base, _df=fh)
1006 delta = mdiff.textdiff(ptext, t)
1012 delta = mdiff.textdiff(ptext, t)
1007
1013
1008 return delta
1014 return delta
1009
1015
1010 def _builddeltainfo(self, revinfo, base, fh):
1016 def _builddeltainfo(self, revinfo, base, fh):
1011 # can we use the cached delta?
1017 # can we use the cached delta?
1012 revlog = self.revlog
1018 revlog = self.revlog
1013 debug_search = self._write_debug is not None and self._debug_search
1019 debug_search = self._write_debug is not None and self._debug_search
1014 chainbase = revlog.chainbase(base)
1020 chainbase = revlog.chainbase(base)
1015 if revlog._generaldelta:
1021 if revlog._generaldelta:
1016 deltabase = base
1022 deltabase = base
1017 else:
1023 else:
1018 deltabase = chainbase
1024 deltabase = chainbase
1019 snapshotdepth = None
1025 snapshotdepth = None
1020 if revlog._sparserevlog and deltabase == nullrev:
1026 if revlog._sparserevlog and deltabase == nullrev:
1021 snapshotdepth = 0
1027 snapshotdepth = 0
1022 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1028 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1023 # A delta chain should always be one full snapshot,
1029 # A delta chain should always be one full snapshot,
1024 # zero or more semi-snapshots, and zero or more deltas
1030 # zero or more semi-snapshots, and zero or more deltas
1025 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1031 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1026 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1032 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1027 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1033 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1028 delta = None
1034 delta = None
1029 if revinfo.cachedelta:
1035 if revinfo.cachedelta:
1030 cachebase, cachediff = revinfo.cachedelta
1036 cachebase, cachediff = revinfo.cachedelta
1031 # check if the diff still apply
1037 # check if the diff still apply
1032 currentbase = cachebase
1038 currentbase = cachebase
1033 while (
1039 while (
1034 currentbase != nullrev
1040 currentbase != nullrev
1035 and currentbase != base
1041 and currentbase != base
1036 and self.revlog.length(currentbase) == 0
1042 and self.revlog.length(currentbase) == 0
1037 ):
1043 ):
1038 currentbase = self.revlog.deltaparent(currentbase)
1044 currentbase = self.revlog.deltaparent(currentbase)
1039 if self.revlog._lazydelta and currentbase == base:
1045 if self.revlog._lazydelta and currentbase == base:
1040 delta = revinfo.cachedelta[1]
1046 delta = revinfo.cachedelta[1]
1041 if delta is None:
1047 if delta is None:
1042 delta = self._builddeltadiff(base, revinfo, fh)
1048 delta = self._builddeltadiff(base, revinfo, fh)
1043 if debug_search:
1049 if debug_search:
1044 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1050 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1045 msg %= len(delta)
1051 msg %= len(delta)
1046 self._write_debug(msg)
1052 self._write_debug(msg)
1047 # snapshotdept need to be neither None nor 0 level snapshot
1053 # snapshotdept need to be neither None nor 0 level snapshot
1048 if revlog.upperboundcomp is not None and snapshotdepth:
1054 if revlog.upperboundcomp is not None and snapshotdepth:
1049 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1055 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1050 snapshotlimit = revinfo.textlen >> snapshotdepth
1056 snapshotlimit = revinfo.textlen >> snapshotdepth
1051 if debug_search:
1057 if debug_search:
1052 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1058 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1053 msg %= lowestrealisticdeltalen
1059 msg %= lowestrealisticdeltalen
1054 self._write_debug(msg)
1060 self._write_debug(msg)
1055 if snapshotlimit < lowestrealisticdeltalen:
1061 if snapshotlimit < lowestrealisticdeltalen:
1056 if debug_search:
1062 if debug_search:
1057 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1063 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1058 self._write_debug(msg)
1064 self._write_debug(msg)
1059 return None
1065 return None
1060 if revlog.length(base) < lowestrealisticdeltalen:
1066 if revlog.length(base) < lowestrealisticdeltalen:
1061 if debug_search:
1067 if debug_search:
1062 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1068 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1063 self._write_debug(msg)
1069 self._write_debug(msg)
1064 return None
1070 return None
1065 header, data = revlog.compress(delta)
1071 header, data = revlog.compress(delta)
1066 deltalen = len(header) + len(data)
1072 deltalen = len(header) + len(data)
1067 offset = revlog.end(len(revlog) - 1)
1073 offset = revlog.end(len(revlog) - 1)
1068 dist = deltalen + offset - revlog.start(chainbase)
1074 dist = deltalen + offset - revlog.start(chainbase)
1069 chainlen, compresseddeltalen = revlog._chaininfo(base)
1075 chainlen, compresseddeltalen = revlog._chaininfo(base)
1070 chainlen += 1
1076 chainlen += 1
1071 compresseddeltalen += deltalen
1077 compresseddeltalen += deltalen
1072
1078
1073 return _deltainfo(
1079 return _deltainfo(
1074 dist,
1080 dist,
1075 deltalen,
1081 deltalen,
1076 (header, data),
1082 (header, data),
1077 deltabase,
1083 deltabase,
1078 chainbase,
1084 chainbase,
1079 chainlen,
1085 chainlen,
1080 compresseddeltalen,
1086 compresseddeltalen,
1081 snapshotdepth,
1087 snapshotdepth,
1082 )
1088 )
1083
1089
1084 def _fullsnapshotinfo(self, fh, revinfo, curr):
1090 def _fullsnapshotinfo(self, fh, revinfo, curr):
1085 rawtext = self.buildtext(revinfo, fh)
1091 rawtext = self.buildtext(revinfo, fh)
1086 data = self.revlog.compress(rawtext)
1092 data = self.revlog.compress(rawtext)
1087 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1093 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1088 deltabase = chainbase = curr
1094 deltabase = chainbase = curr
1089 snapshotdepth = 0
1095 snapshotdepth = 0
1090 chainlen = 1
1096 chainlen = 1
1091
1097
1092 return _deltainfo(
1098 return _deltainfo(
1093 dist,
1099 dist,
1094 deltalen,
1100 deltalen,
1095 data,
1101 data,
1096 deltabase,
1102 deltabase,
1097 chainbase,
1103 chainbase,
1098 chainlen,
1104 chainlen,
1099 compresseddeltalen,
1105 compresseddeltalen,
1100 snapshotdepth,
1106 snapshotdepth,
1101 )
1107 )
1102
1108
1103 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1109 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1104 """Find an acceptable delta against a candidate revision
1110 """Find an acceptable delta against a candidate revision
1105
1111
1106 revinfo: information about the revision (instance of _revisioninfo)
1112 revinfo: information about the revision (instance of _revisioninfo)
1107 fh: file handle to either the .i or the .d revlog file,
1113 fh: file handle to either the .i or the .d revlog file,
1108 depending on whether it is inlined or not
1114 depending on whether it is inlined or not
1109
1115
1110 Returns the first acceptable candidate revision, as ordered by
1116 Returns the first acceptable candidate revision, as ordered by
1111 _candidategroups
1117 _candidategroups
1112
1118
1113 If no suitable deltabase is found, we return delta info for a full
1119 If no suitable deltabase is found, we return delta info for a full
1114 snapshot.
1120 snapshot.
1115
1121
1116 `excluded_bases` is an optional set of revision that cannot be used as
1122 `excluded_bases` is an optional set of revision that cannot be used as
1117 a delta base. Use this to recompute delta suitable in censor or strip
1123 a delta base. Use this to recompute delta suitable in censor or strip
1118 context.
1124 context.
1119 """
1125 """
1120 if target_rev is None:
1126 if target_rev is None:
1121 target_rev = len(self.revlog)
1127 target_rev = len(self.revlog)
1122
1128
1123 if not revinfo.textlen:
1129 if not revinfo.textlen:
1124 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1130 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1125
1131
1126 if excluded_bases is None:
1132 if excluded_bases is None:
1127 excluded_bases = set()
1133 excluded_bases = set()
1128
1134
1129 # no delta for flag processor revision (see "candelta" for why)
1135 # no delta for flag processor revision (see "candelta" for why)
1130 # not calling candelta since only one revision needs test, also to
1136 # not calling candelta since only one revision needs test, also to
1131 # avoid overhead fetching flags again.
1137 # avoid overhead fetching flags again.
1132 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1138 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1133 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1139 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1134
1140
1135 gather_debug = (
1141 gather_debug = (
1136 self._write_debug is not None or self._debug_info is not None
1142 self._write_debug is not None or self._debug_info is not None
1137 )
1143 )
1138 debug_search = self._write_debug is not None and self._debug_search
1144 debug_search = self._write_debug is not None and self._debug_search
1139
1145
1140 if gather_debug:
1146 if gather_debug:
1141 start = util.timer()
1147 start = util.timer()
1142
1148
1143 # count the number of different delta we tried (for debug purpose)
1149 # count the number of different delta we tried (for debug purpose)
1144 dbg_try_count = 0
1150 dbg_try_count = 0
1145 # count the number of "search round" we did. (for debug purpose)
1151 # count the number of "search round" we did. (for debug purpose)
1146 dbg_try_rounds = 0
1152 dbg_try_rounds = 0
1147 dbg_type = b'unknown'
1153 dbg_type = b'unknown'
1148
1154
1149 cachedelta = revinfo.cachedelta
1155 cachedelta = revinfo.cachedelta
1150 p1 = revinfo.p1
1156 p1 = revinfo.p1
1151 p2 = revinfo.p2
1157 p2 = revinfo.p2
1152 revlog = self.revlog
1158 revlog = self.revlog
1153
1159
1154 deltainfo = None
1160 deltainfo = None
1155 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1161 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1156
1162
1157 if gather_debug:
1163 if gather_debug:
1158 if p1r != nullrev:
1164 if p1r != nullrev:
1159 p1_chain_len = revlog._chaininfo(p1r)[0]
1165 p1_chain_len = revlog._chaininfo(p1r)[0]
1160 else:
1166 else:
1161 p1_chain_len = -1
1167 p1_chain_len = -1
1162 if p2r != nullrev:
1168 if p2r != nullrev:
1163 p2_chain_len = revlog._chaininfo(p2r)[0]
1169 p2_chain_len = revlog._chaininfo(p2r)[0]
1164 else:
1170 else:
1165 p2_chain_len = -1
1171 p2_chain_len = -1
1166 if debug_search:
1172 if debug_search:
1167 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1173 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1168 msg %= target_rev
1174 msg %= target_rev
1169 self._write_debug(msg)
1175 self._write_debug(msg)
1170
1176
1171 groups = _candidategroups(
1177 groups = _candidategroups(
1172 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1178 self.revlog,
1179 revinfo.textlen,
1180 p1r,
1181 p2r,
1182 cachedelta,
1173 )
1183 )
1174 candidaterevs = next(groups)
1184 candidaterevs = next(groups)
1175 while candidaterevs is not None:
1185 while candidaterevs is not None:
1176 dbg_try_rounds += 1
1186 dbg_try_rounds += 1
1177 if debug_search:
1187 if debug_search:
1178 prev = None
1188 prev = None
1179 if deltainfo is not None:
1189 if deltainfo is not None:
1180 prev = deltainfo.base
1190 prev = deltainfo.base
1181
1191
1182 if (
1192 if (
1183 cachedelta is not None
1193 cachedelta is not None
1184 and len(candidaterevs) == 1
1194 and len(candidaterevs) == 1
1185 and cachedelta[0] in candidaterevs
1195 and cachedelta[0] in candidaterevs
1186 ):
1196 ):
1187 round_type = b"cached-delta"
1197 round_type = b"cached-delta"
1188 elif p1 in candidaterevs or p2 in candidaterevs:
1198 elif p1 in candidaterevs or p2 in candidaterevs:
1189 round_type = b"parents"
1199 round_type = b"parents"
1190 elif prev is not None and all(c < prev for c in candidaterevs):
1200 elif prev is not None and all(c < prev for c in candidaterevs):
1191 round_type = b"refine-down"
1201 round_type = b"refine-down"
1192 elif prev is not None and all(c > prev for c in candidaterevs):
1202 elif prev is not None and all(c > prev for c in candidaterevs):
1193 round_type = b"refine-up"
1203 round_type = b"refine-up"
1194 else:
1204 else:
1195 round_type = b"search-down"
1205 round_type = b"search-down"
1196 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1206 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1197 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1207 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1198 self._write_debug(msg)
1208 self._write_debug(msg)
1199 nominateddeltas = []
1209 nominateddeltas = []
1200 if deltainfo is not None:
1210 if deltainfo is not None:
1201 if debug_search:
1211 if debug_search:
1202 msg = (
1212 msg = (
1203 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1213 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1204 )
1214 )
1205 msg %= (deltainfo.base, deltainfo.deltalen)
1215 msg %= (deltainfo.base, deltainfo.deltalen)
1206 self._write_debug(msg)
1216 self._write_debug(msg)
1207 # if we already found a good delta,
1217 # if we already found a good delta,
1208 # challenge it against refined candidates
1218 # challenge it against refined candidates
1209 nominateddeltas.append(deltainfo)
1219 nominateddeltas.append(deltainfo)
1210 for candidaterev in candidaterevs:
1220 for candidaterev in candidaterevs:
1211 if debug_search:
1221 if debug_search:
1212 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1222 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1213 msg %= candidaterev
1223 msg %= candidaterev
1214 self._write_debug(msg)
1224 self._write_debug(msg)
1215 candidate_type = None
1225 candidate_type = None
1216 if candidaterev == p1:
1226 if candidaterev == p1:
1217 candidate_type = b"p1"
1227 candidate_type = b"p1"
1218 elif candidaterev == p2:
1228 elif candidaterev == p2:
1219 candidate_type = b"p2"
1229 candidate_type = b"p2"
1220 elif self.revlog.issnapshot(candidaterev):
1230 elif self.revlog.issnapshot(candidaterev):
1221 candidate_type = b"snapshot-%d"
1231 candidate_type = b"snapshot-%d"
1222 candidate_type %= self.revlog.snapshotdepth(
1232 candidate_type %= self.revlog.snapshotdepth(
1223 candidaterev
1233 candidaterev
1224 )
1234 )
1225
1235
1226 if candidate_type is not None:
1236 if candidate_type is not None:
1227 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1237 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1228 msg %= candidate_type
1238 msg %= candidate_type
1229 self._write_debug(msg)
1239 self._write_debug(msg)
1230 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1240 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1231 msg %= self.revlog.length(candidaterev)
1241 msg %= self.revlog.length(candidaterev)
1232 self._write_debug(msg)
1242 self._write_debug(msg)
1233 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1243 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1234 msg %= self.revlog.deltaparent(candidaterev)
1244 msg %= self.revlog.deltaparent(candidaterev)
1235 self._write_debug(msg)
1245 self._write_debug(msg)
1236 if candidaterev in excluded_bases:
1246 if candidaterev in excluded_bases:
1237 if debug_search:
1247 if debug_search:
1238 msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n"
1248 msg = b"DBG-DELTAS-SEARCH: EXCLUDED\n"
1239 self._write_debug(msg)
1249 self._write_debug(msg)
1240 continue
1250 continue
1241 if candidaterev >= target_rev:
1251 if candidaterev >= target_rev:
1242 if debug_search:
1252 if debug_search:
1243 msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n"
1253 msg = b"DBG-DELTAS-SEARCH: TOO-HIGH\n"
1244 self._write_debug(msg)
1254 self._write_debug(msg)
1245 continue
1255 continue
1246 dbg_try_count += 1
1256 dbg_try_count += 1
1247
1257
1248 if debug_search:
1258 if debug_search:
1249 delta_start = util.timer()
1259 delta_start = util.timer()
1250 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1260 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1251 if debug_search:
1261 if debug_search:
1252 delta_end = util.timer()
1262 delta_end = util.timer()
1253 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1263 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1254 msg %= delta_end - delta_start
1264 msg %= delta_end - delta_start
1255 self._write_debug(msg)
1265 self._write_debug(msg)
1256 if candidatedelta is not None:
1266 if candidatedelta is not None:
1257 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1267 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1258 if debug_search:
1268 if debug_search:
1259 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1269 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1260 msg %= candidatedelta.deltalen
1270 msg %= candidatedelta.deltalen
1261 self._write_debug(msg)
1271 self._write_debug(msg)
1262 nominateddeltas.append(candidatedelta)
1272 nominateddeltas.append(candidatedelta)
1263 elif debug_search:
1273 elif debug_search:
1264 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1274 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1265 msg %= candidatedelta.deltalen
1275 msg %= candidatedelta.deltalen
1266 self._write_debug(msg)
1276 self._write_debug(msg)
1267 elif debug_search:
1277 elif debug_search:
1268 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1278 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1269 self._write_debug(msg)
1279 self._write_debug(msg)
1270 if nominateddeltas:
1280 if nominateddeltas:
1271 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1281 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1272 if deltainfo is not None:
1282 if deltainfo is not None:
1273 candidaterevs = groups.send(deltainfo.base)
1283 candidaterevs = groups.send(deltainfo.base)
1274 else:
1284 else:
1275 candidaterevs = next(groups)
1285 candidaterevs = next(groups)
1276
1286
1277 if deltainfo is None:
1287 if deltainfo is None:
1278 dbg_type = b"full"
1288 dbg_type = b"full"
1279 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1289 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1280 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1290 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1281 dbg_type = b"snapshot"
1291 dbg_type = b"snapshot"
1282 else:
1292 else:
1283 dbg_type = b"delta"
1293 dbg_type = b"delta"
1284
1294
1285 if gather_debug:
1295 if gather_debug:
1286 end = util.timer()
1296 end = util.timer()
1287 used_cached = (
1297 used_cached = (
1288 cachedelta is not None
1298 cachedelta is not None
1289 and dbg_try_rounds == 1
1299 and dbg_try_rounds == 1
1290 and dbg_try_count == 1
1300 and dbg_try_count == 1
1291 and deltainfo.base == cachedelta[0]
1301 and deltainfo.base == cachedelta[0]
1292 )
1302 )
1293 dbg = {
1303 dbg = {
1294 'duration': end - start,
1304 'duration': end - start,
1295 'revision': target_rev,
1305 'revision': target_rev,
1296 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1306 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1297 'search_round_count': dbg_try_rounds,
1307 'search_round_count': dbg_try_rounds,
1298 'using-cached-base': used_cached,
1308 'using-cached-base': used_cached,
1299 'delta_try_count': dbg_try_count,
1309 'delta_try_count': dbg_try_count,
1300 'type': dbg_type,
1310 'type': dbg_type,
1301 'p1-chain-len': p1_chain_len,
1311 'p1-chain-len': p1_chain_len,
1302 'p2-chain-len': p2_chain_len,
1312 'p2-chain-len': p2_chain_len,
1303 }
1313 }
1304 if (
1314 if (
1305 deltainfo.snapshotdepth # pytype: disable=attribute-error
1315 deltainfo.snapshotdepth # pytype: disable=attribute-error
1306 is not None
1316 is not None
1307 ):
1317 ):
1308 dbg[
1318 dbg[
1309 'snapshot-depth'
1319 'snapshot-depth'
1310 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1320 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1311 else:
1321 else:
1312 dbg['snapshot-depth'] = 0
1322 dbg['snapshot-depth'] = 0
1313 target_revlog = b"UNKNOWN"
1323 target_revlog = b"UNKNOWN"
1314 target_type = self.revlog.target[0]
1324 target_type = self.revlog.target[0]
1315 target_key = self.revlog.target[1]
1325 target_key = self.revlog.target[1]
1316 if target_type == KIND_CHANGELOG:
1326 if target_type == KIND_CHANGELOG:
1317 target_revlog = b'CHANGELOG:'
1327 target_revlog = b'CHANGELOG:'
1318 elif target_type == KIND_MANIFESTLOG:
1328 elif target_type == KIND_MANIFESTLOG:
1319 target_revlog = b'MANIFESTLOG:'
1329 target_revlog = b'MANIFESTLOG:'
1320 if target_key:
1330 if target_key:
1321 target_revlog += b'%s:' % target_key
1331 target_revlog += b'%s:' % target_key
1322 elif target_type == KIND_FILELOG:
1332 elif target_type == KIND_FILELOG:
1323 target_revlog = b'FILELOG:'
1333 target_revlog = b'FILELOG:'
1324 if target_key:
1334 if target_key:
1325 target_revlog += b'%s:' % target_key
1335 target_revlog += b'%s:' % target_key
1326 dbg['target-revlog'] = target_revlog
1336 dbg['target-revlog'] = target_revlog
1327
1337
1328 if self._debug_info is not None:
1338 if self._debug_info is not None:
1329 self._debug_info.append(dbg)
1339 self._debug_info.append(dbg)
1330
1340
1331 if self._write_debug is not None:
1341 if self._write_debug is not None:
1332 msg = (
1342 msg = (
1333 b"DBG-DELTAS:"
1343 b"DBG-DELTAS:"
1334 b" %-12s"
1344 b" %-12s"
1335 b" rev=%d:"
1345 b" rev=%d:"
1336 b" delta-base=%d"
1346 b" delta-base=%d"
1337 b" is-cached=%d"
1347 b" is-cached=%d"
1338 b" - search-rounds=%d"
1348 b" - search-rounds=%d"
1339 b" try-count=%d"
1349 b" try-count=%d"
1340 b" - delta-type=%-6s"
1350 b" - delta-type=%-6s"
1341 b" snap-depth=%d"
1351 b" snap-depth=%d"
1342 b" - p1-chain-length=%d"
1352 b" - p1-chain-length=%d"
1343 b" p2-chain-length=%d"
1353 b" p2-chain-length=%d"
1344 b" - duration=%f"
1354 b" - duration=%f"
1345 b"\n"
1355 b"\n"
1346 )
1356 )
1347 msg %= (
1357 msg %= (
1348 dbg["target-revlog"],
1358 dbg["target-revlog"],
1349 dbg["revision"],
1359 dbg["revision"],
1350 dbg["delta-base"],
1360 dbg["delta-base"],
1351 dbg["using-cached-base"],
1361 dbg["using-cached-base"],
1352 dbg["search_round_count"],
1362 dbg["search_round_count"],
1353 dbg["delta_try_count"],
1363 dbg["delta_try_count"],
1354 dbg["type"],
1364 dbg["type"],
1355 dbg["snapshot-depth"],
1365 dbg["snapshot-depth"],
1356 dbg["p1-chain-len"],
1366 dbg["p1-chain-len"],
1357 dbg["p2-chain-len"],
1367 dbg["p2-chain-len"],
1358 dbg["duration"],
1368 dbg["duration"],
1359 )
1369 )
1360 self._write_debug(msg)
1370 self._write_debug(msg)
1361 return deltainfo
1371 return deltainfo
1362
1372
1363
1373
1364 def delta_compression(default_compression_header, deltainfo):
1374 def delta_compression(default_compression_header, deltainfo):
1365 """return (COMPRESSION_MODE, deltainfo)
1375 """return (COMPRESSION_MODE, deltainfo)
1366
1376
1367 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1377 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1368 compression.
1378 compression.
1369 """
1379 """
1370 h, d = deltainfo.data
1380 h, d = deltainfo.data
1371 compression_mode = COMP_MODE_INLINE
1381 compression_mode = COMP_MODE_INLINE
1372 if not h and not d:
1382 if not h and not d:
1373 # not data to store at all... declare them uncompressed
1383 # not data to store at all... declare them uncompressed
1374 compression_mode = COMP_MODE_PLAIN
1384 compression_mode = COMP_MODE_PLAIN
1375 elif not h:
1385 elif not h:
1376 t = d[0:1]
1386 t = d[0:1]
1377 if t == b'\0':
1387 if t == b'\0':
1378 compression_mode = COMP_MODE_PLAIN
1388 compression_mode = COMP_MODE_PLAIN
1379 elif t == default_compression_header:
1389 elif t == default_compression_header:
1380 compression_mode = COMP_MODE_DEFAULT
1390 compression_mode = COMP_MODE_DEFAULT
1381 elif h == b'u':
1391 elif h == b'u':
1382 # we have a more efficient way to declare uncompressed
1392 # we have a more efficient way to declare uncompressed
1383 h = b''
1393 h = b''
1384 compression_mode = COMP_MODE_PLAIN
1394 compression_mode = COMP_MODE_PLAIN
1385 deltainfo = drop_u_compression(deltainfo)
1395 deltainfo = drop_u_compression(deltainfo)
1386 return compression_mode, deltainfo
1396 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now