##// END OF EJS Templates
bundle: when forcing acceptance of incoming delta also accept snapshot...
marmoute -
r50677:acdb9a15 default
parent child Browse files
Show More
@@ -1,1502 +1,1511 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 self._data = data
51 self._data = data
52 self._srdensitythreshold = density
52 self._srdensitythreshold = density
53 self._srmingapsize = mingap
53 self._srmingapsize = mingap
54 self._snapshot = set(snapshot)
54 self._snapshot = set(snapshot)
55 self.index = None
55 self.index = None
56
56
57 def start(self, rev):
57 def start(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 if rev == 0:
60 if rev == 0:
61 return 0
61 return 0
62 return self._data[rev - 1]
62 return self._data[rev - 1]
63
63
64 def end(self, rev):
64 def end(self, rev):
65 if rev == nullrev:
65 if rev == nullrev:
66 return 0
66 return 0
67 return self._data[rev]
67 return self._data[rev]
68
68
69 def length(self, rev):
69 def length(self, rev):
70 return self.end(rev) - self.start(rev)
70 return self.end(rev) - self.start(rev)
71
71
72 def __len__(self):
72 def __len__(self):
73 return len(self._data)
73 return len(self._data)
74
74
75 def issnapshot(self, rev):
75 def issnapshot(self, rev):
76 if rev == nullrev:
76 if rev == nullrev:
77 return True
77 return True
78 return rev in self._snapshot
78 return rev in self._snapshot
79
79
80
80
81 def slicechunk(revlog, revs, targetsize=None):
81 def slicechunk(revlog, revs, targetsize=None):
82 """slice revs to reduce the amount of unrelated data to be read from disk.
82 """slice revs to reduce the amount of unrelated data to be read from disk.
83
83
84 ``revs`` is sliced into groups that should be read in one time.
84 ``revs`` is sliced into groups that should be read in one time.
85 Assume that revs are sorted.
85 Assume that revs are sorted.
86
86
87 The initial chunk is sliced until the overall density (payload/chunks-span
87 The initial chunk is sliced until the overall density (payload/chunks-span
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 `revlog._srmingapsize` is skipped.
89 `revlog._srmingapsize` is skipped.
90
90
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 For consistency with other slicing choice, this limit won't go lower than
92 For consistency with other slicing choice, this limit won't go lower than
93 `revlog._srmingapsize`.
93 `revlog._srmingapsize`.
94
94
95 If individual revisions chunk are larger than this limit, they will still
95 If individual revisions chunk are larger than this limit, they will still
96 be raised individually.
96 be raised individually.
97
97
98 >>> data = [
98 >>> data = [
99 ... 5, #00 (5)
99 ... 5, #00 (5)
100 ... 10, #01 (5)
100 ... 10, #01 (5)
101 ... 12, #02 (2)
101 ... 12, #02 (2)
102 ... 12, #03 (empty)
102 ... 12, #03 (empty)
103 ... 27, #04 (15)
103 ... 27, #04 (15)
104 ... 31, #05 (4)
104 ... 31, #05 (4)
105 ... 31, #06 (empty)
105 ... 31, #06 (empty)
106 ... 42, #07 (11)
106 ... 42, #07 (11)
107 ... 47, #08 (5)
107 ... 47, #08 (5)
108 ... 47, #09 (empty)
108 ... 47, #09 (empty)
109 ... 48, #10 (1)
109 ... 48, #10 (1)
110 ... 51, #11 (3)
110 ... 51, #11 (3)
111 ... 74, #12 (23)
111 ... 74, #12 (23)
112 ... 85, #13 (11)
112 ... 85, #13 (11)
113 ... 86, #14 (1)
113 ... 86, #14 (1)
114 ... 91, #15 (5)
114 ... 91, #15 (5)
115 ... ]
115 ... ]
116 >>> revlog = _testrevlog(data, snapshot=range(16))
116 >>> revlog = _testrevlog(data, snapshot=range(16))
117
117
118 >>> list(slicechunk(revlog, list(range(16))))
118 >>> list(slicechunk(revlog, list(range(16))))
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 >>> list(slicechunk(revlog, [0, 15]))
120 >>> list(slicechunk(revlog, [0, 15]))
121 [[0], [15]]
121 [[0], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 [[0], [11], [15]]
123 [[0], [11], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 [[0], [11, 13, 15]]
125 [[0], [11, 13, 15]]
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 [[1, 2], [5, 8, 10, 11], [14]]
127 [[1, 2], [5, 8, 10, 11], [14]]
128
128
129 Slicing with a maximum chunk size
129 Slicing with a maximum chunk size
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 [[0], [11], [13], [15]]
131 [[0], [11], [13], [15]]
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 [[0], [11], [13, 15]]
133 [[0], [11], [13, 15]]
134
134
135 Slicing involving nullrev
135 Slicing involving nullrev
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 [[-1, 0], [11], [13, 15]]
137 [[-1, 0], [11], [13, 15]]
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 [[-1], [13], [15]]
139 [[-1], [13], [15]]
140 """
140 """
141 if targetsize is not None:
141 if targetsize is not None:
142 targetsize = max(targetsize, revlog._srmingapsize)
142 targetsize = max(targetsize, revlog._srmingapsize)
143 # targetsize should not be specified when evaluating delta candidates:
143 # targetsize should not be specified when evaluating delta candidates:
144 # * targetsize is used to ensure we stay within specification when reading,
144 # * targetsize is used to ensure we stay within specification when reading,
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 if densityslicing is None:
146 if densityslicing is None:
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 for chunk in densityslicing(
148 for chunk in densityslicing(
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 ):
150 ):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 yield subchunk
152 yield subchunk
153
153
154
154
155 def _slicechunktosize(revlog, revs, targetsize=None):
155 def _slicechunktosize(revlog, revs, targetsize=None):
156 """slice revs to match the target size
156 """slice revs to match the target size
157
157
158 This is intended to be used on chunk that density slicing selected by that
158 This is intended to be used on chunk that density slicing selected by that
159 are still too large compared to the read garantee of revlog. This might
159 are still too large compared to the read garantee of revlog. This might
160 happens when "minimal gap size" interrupted the slicing or when chain are
160 happens when "minimal gap size" interrupted the slicing or when chain are
161 built in a way that create large blocks next to each other.
161 built in a way that create large blocks next to each other.
162
162
163 >>> data = [
163 >>> data = [
164 ... 3, #0 (3)
164 ... 3, #0 (3)
165 ... 5, #1 (2)
165 ... 5, #1 (2)
166 ... 6, #2 (1)
166 ... 6, #2 (1)
167 ... 8, #3 (2)
167 ... 8, #3 (2)
168 ... 8, #4 (empty)
168 ... 8, #4 (empty)
169 ... 11, #5 (3)
169 ... 11, #5 (3)
170 ... 12, #6 (1)
170 ... 12, #6 (1)
171 ... 13, #7 (1)
171 ... 13, #7 (1)
172 ... 14, #8 (1)
172 ... 14, #8 (1)
173 ... ]
173 ... ]
174
174
175 == All snapshots cases ==
175 == All snapshots cases ==
176 >>> revlog = _testrevlog(data, snapshot=range(9))
176 >>> revlog = _testrevlog(data, snapshot=range(9))
177
177
178 Cases where chunk is already small enough
178 Cases where chunk is already small enough
179 >>> list(_slicechunktosize(revlog, [0], 3))
179 >>> list(_slicechunktosize(revlog, [0], 3))
180 [[0]]
180 [[0]]
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 [[6, 7]]
182 [[6, 7]]
183 >>> list(_slicechunktosize(revlog, [0], None))
183 >>> list(_slicechunktosize(revlog, [0], None))
184 [[0]]
184 [[0]]
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 [[6, 7]]
186 [[6, 7]]
187
187
188 cases where we need actual slicing
188 cases where we need actual slicing
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 [[0], [1]]
190 [[0], [1]]
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 [[1], [3]]
192 [[1], [3]]
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 [[1, 2], [3]]
194 [[1, 2], [3]]
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 [[3], [5]]
198 [[3], [5]]
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 [[5], [6, 7, 8]]
200 [[5], [6, 7, 8]]
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203
203
204 Case with too large individual chunk (must return valid chunk)
204 Case with too large individual chunk (must return valid chunk)
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 [[0], [1]]
206 [[0], [1]]
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 [[1], [3]]
208 [[1], [3]]
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 [[3], [5]]
210 [[3], [5]]
211
211
212 == No Snapshot cases ==
212 == No Snapshot cases ==
213 >>> revlog = _testrevlog(data)
213 >>> revlog = _testrevlog(data)
214
214
215 Cases where chunk is already small enough
215 Cases where chunk is already small enough
216 >>> list(_slicechunktosize(revlog, [0], 3))
216 >>> list(_slicechunktosize(revlog, [0], 3))
217 [[0]]
217 [[0]]
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 [[6, 7]]
219 [[6, 7]]
220 >>> list(_slicechunktosize(revlog, [0], None))
220 >>> list(_slicechunktosize(revlog, [0], None))
221 [[0]]
221 [[0]]
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 [[6, 7]]
223 [[6, 7]]
224
224
225 cases where we need actual slicing
225 cases where we need actual slicing
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 [[0], [1]]
227 [[0], [1]]
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 [[1], [3]]
229 [[1], [3]]
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 [[1], [2, 3]]
231 [[1], [2, 3]]
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 [[3], [5]]
233 [[3], [5]]
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 [[3], [4, 5]]
235 [[3], [4, 5]]
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 [[5], [6, 7, 8]]
237 [[5], [6, 7, 8]]
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240
240
241 Case with too large individual chunk (must return valid chunk)
241 Case with too large individual chunk (must return valid chunk)
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 [[0], [1]]
243 [[0], [1]]
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 [[1], [3]]
245 [[1], [3]]
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 [[3], [5]]
247 [[3], [5]]
248
248
249 == mixed case ==
249 == mixed case ==
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 """
253 """
254 assert targetsize is None or 0 <= targetsize
254 assert targetsize is None or 0 <= targetsize
255 startdata = revlog.start(revs[0])
255 startdata = revlog.start(revs[0])
256 enddata = revlog.end(revs[-1])
256 enddata = revlog.end(revs[-1])
257 fullspan = enddata - startdata
257 fullspan = enddata - startdata
258 if targetsize is None or fullspan <= targetsize:
258 if targetsize is None or fullspan <= targetsize:
259 yield revs
259 yield revs
260 return
260 return
261
261
262 startrevidx = 0
262 startrevidx = 0
263 endrevidx = 1
263 endrevidx = 1
264 iterrevs = enumerate(revs)
264 iterrevs = enumerate(revs)
265 next(iterrevs) # skip first rev.
265 next(iterrevs) # skip first rev.
266 # first step: get snapshots out of the way
266 # first step: get snapshots out of the way
267 for idx, r in iterrevs:
267 for idx, r in iterrevs:
268 span = revlog.end(r) - startdata
268 span = revlog.end(r) - startdata
269 snapshot = revlog.issnapshot(r)
269 snapshot = revlog.issnapshot(r)
270 if span <= targetsize and snapshot:
270 if span <= targetsize and snapshot:
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 else:
272 else:
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 if chunk:
274 if chunk:
275 yield chunk
275 yield chunk
276 startrevidx = idx
276 startrevidx = idx
277 startdata = revlog.start(r)
277 startdata = revlog.start(r)
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 if not snapshot:
279 if not snapshot:
280 break
280 break
281
281
282 # for the others, we use binary slicing to quickly converge toward valid
282 # for the others, we use binary slicing to quickly converge toward valid
283 # chunks (otherwise, we might end up looking for start/end of many
283 # chunks (otherwise, we might end up looking for start/end of many
284 # revisions). This logic is not looking for the perfect slicing point, it
284 # revisions). This logic is not looking for the perfect slicing point, it
285 # focuses on quickly converging toward valid chunks.
285 # focuses on quickly converging toward valid chunks.
286 nbitem = len(revs)
286 nbitem = len(revs)
287 while (enddata - startdata) > targetsize:
287 while (enddata - startdata) > targetsize:
288 endrevidx = nbitem
288 endrevidx = nbitem
289 if nbitem - startrevidx <= 1:
289 if nbitem - startrevidx <= 1:
290 break # protect against individual chunk larger than limit
290 break # protect against individual chunk larger than limit
291 localenddata = revlog.end(revs[endrevidx - 1])
291 localenddata = revlog.end(revs[endrevidx - 1])
292 span = localenddata - startdata
292 span = localenddata - startdata
293 while span > targetsize:
293 while span > targetsize:
294 if endrevidx - startrevidx <= 1:
294 if endrevidx - startrevidx <= 1:
295 break # protect against individual chunk larger than limit
295 break # protect against individual chunk larger than limit
296 endrevidx -= (endrevidx - startrevidx) // 2
296 endrevidx -= (endrevidx - startrevidx) // 2
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 if chunk:
300 if chunk:
301 yield chunk
301 yield chunk
302 startrevidx = endrevidx
302 startrevidx = endrevidx
303 startdata = revlog.start(revs[startrevidx])
303 startdata = revlog.start(revs[startrevidx])
304
304
305 chunk = _trimchunk(revlog, revs, startrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308
308
309
309
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 """slice revs to reduce the amount of unrelated data to be read from disk.
311 """slice revs to reduce the amount of unrelated data to be read from disk.
312
312
313 ``revs`` is sliced into groups that should be read in one time.
313 ``revs`` is sliced into groups that should be read in one time.
314 Assume that revs are sorted.
314 Assume that revs are sorted.
315
315
316 The initial chunk is sliced until the overall density (payload/chunks-span
316 The initial chunk is sliced until the overall density (payload/chunks-span
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 skipped.
318 skipped.
319
319
320 >>> revlog = _testrevlog([
320 >>> revlog = _testrevlog([
321 ... 5, #00 (5)
321 ... 5, #00 (5)
322 ... 10, #01 (5)
322 ... 10, #01 (5)
323 ... 12, #02 (2)
323 ... 12, #02 (2)
324 ... 12, #03 (empty)
324 ... 12, #03 (empty)
325 ... 27, #04 (15)
325 ... 27, #04 (15)
326 ... 31, #05 (4)
326 ... 31, #05 (4)
327 ... 31, #06 (empty)
327 ... 31, #06 (empty)
328 ... 42, #07 (11)
328 ... 42, #07 (11)
329 ... 47, #08 (5)
329 ... 47, #08 (5)
330 ... 47, #09 (empty)
330 ... 47, #09 (empty)
331 ... 48, #10 (1)
331 ... 48, #10 (1)
332 ... 51, #11 (3)
332 ... 51, #11 (3)
333 ... 74, #12 (23)
333 ... 74, #12 (23)
334 ... 85, #13 (11)
334 ... 85, #13 (11)
335 ... 86, #14 (1)
335 ... 86, #14 (1)
336 ... 91, #15 (5)
336 ... 91, #15 (5)
337 ... ])
337 ... ])
338
338
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 [[0], [15]]
342 [[0], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 [[0], [11], [15]]
344 [[0], [11], [15]]
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 [[0], [11, 13, 15]]
346 [[0], [11, 13, 15]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 ... mingapsize=20))
350 ... mingapsize=20))
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 ... targetdensity=0.95))
353 ... targetdensity=0.95))
354 [[1, 2], [5], [8, 10, 11], [14]]
354 [[1, 2], [5], [8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... targetdensity=0.95, mingapsize=12))
356 ... targetdensity=0.95, mingapsize=12))
357 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
358 """
358 """
359 start = revlog.start
359 start = revlog.start
360 length = revlog.length
360 length = revlog.length
361
361
362 if len(revs) <= 1:
362 if len(revs) <= 1:
363 yield revs
363 yield revs
364 return
364 return
365
365
366 deltachainspan = segmentspan(revlog, revs)
366 deltachainspan = segmentspan(revlog, revs)
367
367
368 if deltachainspan < mingapsize:
368 if deltachainspan < mingapsize:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 readdata = deltachainspan
372 readdata = deltachainspan
373 chainpayload = sum(length(r) for r in revs)
373 chainpayload = sum(length(r) for r in revs)
374
374
375 if deltachainspan:
375 if deltachainspan:
376 density = chainpayload / float(deltachainspan)
376 density = chainpayload / float(deltachainspan)
377 else:
377 else:
378 density = 1.0
378 density = 1.0
379
379
380 if density >= targetdensity:
380 if density >= targetdensity:
381 yield revs
381 yield revs
382 return
382 return
383
383
384 # Store the gaps in a heap to have them sorted by decreasing size
384 # Store the gaps in a heap to have them sorted by decreasing size
385 gaps = []
385 gaps = []
386 prevend = None
386 prevend = None
387 for i, rev in enumerate(revs):
387 for i, rev in enumerate(revs):
388 revstart = start(rev)
388 revstart = start(rev)
389 revlen = length(rev)
389 revlen = length(rev)
390
390
391 # Skip empty revisions to form larger holes
391 # Skip empty revisions to form larger holes
392 if revlen == 0:
392 if revlen == 0:
393 continue
393 continue
394
394
395 if prevend is not None:
395 if prevend is not None:
396 gapsize = revstart - prevend
396 gapsize = revstart - prevend
397 # only consider holes that are large enough
397 # only consider holes that are large enough
398 if gapsize > mingapsize:
398 if gapsize > mingapsize:
399 gaps.append((gapsize, i))
399 gaps.append((gapsize, i))
400
400
401 prevend = revstart + revlen
401 prevend = revstart + revlen
402 # sort the gaps to pop them from largest to small
402 # sort the gaps to pop them from largest to small
403 gaps.sort()
403 gaps.sort()
404
404
405 # Collect the indices of the largest holes until the density is acceptable
405 # Collect the indices of the largest holes until the density is acceptable
406 selected = []
406 selected = []
407 while gaps and density < targetdensity:
407 while gaps and density < targetdensity:
408 gapsize, gapidx = gaps.pop()
408 gapsize, gapidx = gaps.pop()
409
409
410 selected.append(gapidx)
410 selected.append(gapidx)
411
411
412 # the gap sizes are stored as negatives to be sorted decreasingly
412 # the gap sizes are stored as negatives to be sorted decreasingly
413 # by the heap
413 # by the heap
414 readdata -= gapsize
414 readdata -= gapsize
415 if readdata > 0:
415 if readdata > 0:
416 density = chainpayload / float(readdata)
416 density = chainpayload / float(readdata)
417 else:
417 else:
418 density = 1.0
418 density = 1.0
419 selected.sort()
419 selected.sort()
420
420
421 # Cut the revs at collected indices
421 # Cut the revs at collected indices
422 previdx = 0
422 previdx = 0
423 for idx in selected:
423 for idx in selected:
424
424
425 chunk = _trimchunk(revlog, revs, previdx, idx)
425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 if chunk:
426 if chunk:
427 yield chunk
427 yield chunk
428
428
429 previdx = idx
429 previdx = idx
430
430
431 chunk = _trimchunk(revlog, revs, previdx)
431 chunk = _trimchunk(revlog, revs, previdx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435
435
436 def _trimchunk(revlog, revs, startidx, endidx=None):
436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 """returns revs[startidx:endidx] without empty trailing revs
437 """returns revs[startidx:endidx] without empty trailing revs
438
438
439 Doctest Setup
439 Doctest Setup
440 >>> revlog = _testrevlog([
440 >>> revlog = _testrevlog([
441 ... 5, #0
441 ... 5, #0
442 ... 10, #1
442 ... 10, #1
443 ... 12, #2
443 ... 12, #2
444 ... 12, #3 (empty)
444 ... 12, #3 (empty)
445 ... 17, #4
445 ... 17, #4
446 ... 21, #5
446 ... 21, #5
447 ... 21, #6 (empty)
447 ... 21, #6 (empty)
448 ... ])
448 ... ])
449
449
450 Contiguous cases:
450 Contiguous cases:
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 [0, 1, 2, 3, 4, 5]
452 [0, 1, 2, 3, 4, 5]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 [0, 1, 2, 3, 4]
454 [0, 1, 2, 3, 4]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 [0, 1, 2]
456 [0, 1, 2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 [2]
458 [2]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 [3, 4, 5]
460 [3, 4, 5]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 [3, 4]
462 [3, 4]
463
463
464 Discontiguous cases:
464 Discontiguous cases:
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 [1, 3, 5]
466 [1, 3, 5]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 [1]
468 [1]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 [3, 5]
470 [3, 5]
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 [3, 5]
472 [3, 5]
473 """
473 """
474 length = revlog.length
474 length = revlog.length
475
475
476 if endidx is None:
476 if endidx is None:
477 endidx = len(revs)
477 endidx = len(revs)
478
478
479 # If we have a non-emtpy delta candidate, there are nothing to trim
479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 if revs[endidx - 1] < len(revlog):
480 if revs[endidx - 1] < len(revlog):
481 # Trim empty revs at the end, except the very first revision of a chain
481 # Trim empty revs at the end, except the very first revision of a chain
482 while (
482 while (
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 ):
484 ):
485 endidx -= 1
485 endidx -= 1
486
486
487 return revs[startidx:endidx]
487 return revs[startidx:endidx]
488
488
489
489
490 def segmentspan(revlog, revs):
490 def segmentspan(revlog, revs):
491 """Get the byte span of a segment of revisions
491 """Get the byte span of a segment of revisions
492
492
493 revs is a sorted array of revision numbers
493 revs is a sorted array of revision numbers
494
494
495 >>> revlog = _testrevlog([
495 >>> revlog = _testrevlog([
496 ... 5, #0
496 ... 5, #0
497 ... 10, #1
497 ... 10, #1
498 ... 12, #2
498 ... 12, #2
499 ... 12, #3 (empty)
499 ... 12, #3 (empty)
500 ... 17, #4
500 ... 17, #4
501 ... ])
501 ... ])
502
502
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 17
504 17
505 >>> segmentspan(revlog, [0, 4])
505 >>> segmentspan(revlog, [0, 4])
506 17
506 17
507 >>> segmentspan(revlog, [3, 4])
507 >>> segmentspan(revlog, [3, 4])
508 5
508 5
509 >>> segmentspan(revlog, [1, 2, 3,])
509 >>> segmentspan(revlog, [1, 2, 3,])
510 7
510 7
511 >>> segmentspan(revlog, [1, 3])
511 >>> segmentspan(revlog, [1, 3])
512 7
512 7
513 """
513 """
514 if not revs:
514 if not revs:
515 return 0
515 return 0
516 end = revlog.end(revs[-1])
516 end = revlog.end(revs[-1])
517 return end - revlog.start(revs[0])
517 return end - revlog.start(revs[0])
518
518
519
519
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 """build full text from a (base, delta) pair and other metadata"""
521 """build full text from a (base, delta) pair and other metadata"""
522 # special case deltas which replace entire base; no need to decode
522 # special case deltas which replace entire base; no need to decode
523 # base revision. this neatly avoids censored bases, which throw when
523 # base revision. this neatly avoids censored bases, which throw when
524 # they're decoded.
524 # they're decoded.
525 hlen = struct.calcsize(b">lll")
525 hlen = struct.calcsize(b">lll")
526 if delta[:hlen] == mdiff.replacediffheader(
526 if delta[:hlen] == mdiff.replacediffheader(
527 revlog.rawsize(baserev), len(delta) - hlen
527 revlog.rawsize(baserev), len(delta) - hlen
528 ):
528 ):
529 fulltext = delta[hlen:]
529 fulltext = delta[hlen:]
530 else:
530 else:
531 # deltabase is rawtext before changed by flag processors, which is
531 # deltabase is rawtext before changed by flag processors, which is
532 # equivalent to non-raw text
532 # equivalent to non-raw text
533 basetext = revlog.revision(baserev, _df=fh)
533 basetext = revlog.revision(baserev, _df=fh)
534 fulltext = mdiff.patch(basetext, delta)
534 fulltext = mdiff.patch(basetext, delta)
535
535
536 try:
536 try:
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 if validatehash:
538 if validatehash:
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 if flags & REVIDX_ISCENSORED:
540 if flags & REVIDX_ISCENSORED:
541 raise error.StorageError(
541 raise error.StorageError(
542 _(b'node %s is not censored') % expectednode
542 _(b'node %s is not censored') % expectednode
543 )
543 )
544 except error.CensoredNodeError:
544 except error.CensoredNodeError:
545 # must pass the censored index flag to add censored revisions
545 # must pass the censored index flag to add censored revisions
546 if not flags & REVIDX_ISCENSORED:
546 if not flags & REVIDX_ISCENSORED:
547 raise
547 raise
548 return fulltext
548 return fulltext
549
549
550
550
551 @attr.s(slots=True, frozen=True)
551 @attr.s(slots=True, frozen=True)
552 class _deltainfo:
552 class _deltainfo:
553 distance = attr.ib()
553 distance = attr.ib()
554 deltalen = attr.ib()
554 deltalen = attr.ib()
555 data = attr.ib()
555 data = attr.ib()
556 base = attr.ib()
556 base = attr.ib()
557 chainbase = attr.ib()
557 chainbase = attr.ib()
558 chainlen = attr.ib()
558 chainlen = attr.ib()
559 compresseddeltalen = attr.ib()
559 compresseddeltalen = attr.ib()
560 snapshotdepth = attr.ib()
560 snapshotdepth = attr.ib()
561
561
562
562
563 def drop_u_compression(delta):
563 def drop_u_compression(delta):
564 """turn into a "u" (no-compression) into no-compression without header
564 """turn into a "u" (no-compression) into no-compression without header
565
565
566 This is useful for revlog format that has better compression method.
566 This is useful for revlog format that has better compression method.
567 """
567 """
568 assert delta.data[0] == b'u', delta.data[0]
568 assert delta.data[0] == b'u', delta.data[0]
569 return _deltainfo(
569 return _deltainfo(
570 delta.distance,
570 delta.distance,
571 delta.deltalen - 1,
571 delta.deltalen - 1,
572 (b'', delta.data[1]),
572 (b'', delta.data[1]),
573 delta.base,
573 delta.base,
574 delta.chainbase,
574 delta.chainbase,
575 delta.chainlen,
575 delta.chainlen,
576 delta.compresseddeltalen,
576 delta.compresseddeltalen,
577 delta.snapshotdepth,
577 delta.snapshotdepth,
578 )
578 )
579
579
580
580
581 def is_good_delta_info(revlog, deltainfo, revinfo):
581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 """Returns True if the given delta is good. Good means that it is within
582 """Returns True if the given delta is good. Good means that it is within
583 the disk span, disk size, and chain length bounds that we know to be
583 the disk span, disk size, and chain length bounds that we know to be
584 performant."""
584 performant."""
585 if deltainfo is None:
585 if deltainfo is None:
586 return False
586 return False
587
587
588 if (
588 if (
589 revinfo.cachedelta is not None
589 revinfo.cachedelta is not None
590 and deltainfo.base == revinfo.cachedelta[0]
590 and deltainfo.base == revinfo.cachedelta[0]
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 ):
592 ):
593 return True
593 return True
594
594
595 # - 'deltainfo.distance' is the distance from the base revision --
595 # - 'deltainfo.distance' is the distance from the base revision --
596 # bounding it limits the amount of I/O we need to do.
596 # bounding it limits the amount of I/O we need to do.
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 # deltas we need to apply -- bounding it limits the amount of CPU
598 # deltas we need to apply -- bounding it limits the amount of CPU
599 # we consume.
599 # we consume.
600
600
601 textlen = revinfo.textlen
601 textlen = revinfo.textlen
602 defaultmax = textlen * 4
602 defaultmax = textlen * 4
603 maxdist = revlog._maxdeltachainspan
603 maxdist = revlog._maxdeltachainspan
604 if not maxdist:
604 if not maxdist:
605 maxdist = deltainfo.distance # ensure the conditional pass
605 maxdist = deltainfo.distance # ensure the conditional pass
606 maxdist = max(maxdist, defaultmax)
606 maxdist = max(maxdist, defaultmax)
607
607
608 # Bad delta from read span:
608 # Bad delta from read span:
609 #
609 #
610 # If the span of data read is larger than the maximum allowed.
610 # If the span of data read is larger than the maximum allowed.
611 #
611 #
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 # to avoid issue related to the span of data. In theory, it would be
613 # to avoid issue related to the span of data. In theory, it would be
614 # possible to build pathological revlog where delta pattern would lead
614 # possible to build pathological revlog where delta pattern would lead
615 # to too many reads. However, they do not happen in practice at all. So
615 # to too many reads. However, they do not happen in practice at all. So
616 # we skip the span check entirely.
616 # we skip the span check entirely.
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 return False
618 return False
619
619
620 # Bad delta from new delta size:
620 # Bad delta from new delta size:
621 #
621 #
622 # If the delta size is larger than the target text, storing the
622 # If the delta size is larger than the target text, storing the
623 # delta will be inefficient.
623 # delta will be inefficient.
624 if textlen < deltainfo.deltalen:
624 if textlen < deltainfo.deltalen:
625 return False
625 return False
626
626
627 # Bad delta from cumulated payload size:
627 # Bad delta from cumulated payload size:
628 #
628 #
629 # If the sum of delta get larger than K * target text length.
629 # If the sum of delta get larger than K * target text length.
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 return False
631 return False
632
632
633 # Bad delta from chain length:
633 # Bad delta from chain length:
634 #
634 #
635 # If the number of delta in the chain gets too high.
635 # If the number of delta in the chain gets too high.
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 return False
637 return False
638
638
639 # bad delta from intermediate snapshot size limit
639 # bad delta from intermediate snapshot size limit
640 #
640 #
641 # If an intermediate snapshot size is higher than the limit. The
641 # If an intermediate snapshot size is higher than the limit. The
642 # limit exist to prevent endless chain of intermediate delta to be
642 # limit exist to prevent endless chain of intermediate delta to be
643 # created.
643 # created.
644 if (
644 if (
645 deltainfo.snapshotdepth is not None
645 deltainfo.snapshotdepth is not None
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta if new intermediate snapshot is larger than the previous
650 # bad delta if new intermediate snapshot is larger than the previous
651 # snapshot
651 # snapshot
652 if (
652 if (
653 deltainfo.snapshotdepth
653 deltainfo.snapshotdepth
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 ):
655 ):
656 return False
656 return False
657
657
658 return True
658 return True
659
659
660
660
661 # If a revision's full text is that much bigger than a base candidate full
661 # If a revision's full text is that much bigger than a base candidate full
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 # consider these candidates.
663 # consider these candidates.
664 LIMIT_BASE2TEXT = 500
664 LIMIT_BASE2TEXT = 500
665
665
666
666
667 def _candidategroups(
667 def _candidategroups(
668 revlog,
668 revlog,
669 textlen,
669 textlen,
670 p1,
670 p1,
671 p2,
671 p2,
672 cachedelta,
672 cachedelta,
673 excluded_bases=None,
673 excluded_bases=None,
674 target_rev=None,
674 target_rev=None,
675 snapshot_cache=None,
675 snapshot_cache=None,
676 ):
676 ):
677 """Provides group of revision to be tested as delta base
677 """Provides group of revision to be tested as delta base
678
678
679 This top level function focus on emitting groups with unique and worthwhile
679 This top level function focus on emitting groups with unique and worthwhile
680 content. See _raw_candidate_groups for details about the group order.
680 content. See _raw_candidate_groups for details about the group order.
681 """
681 """
682 # should we try to build a delta?
682 # should we try to build a delta?
683 if not (len(revlog) and revlog._storedeltachains):
683 if not (len(revlog) and revlog._storedeltachains):
684 yield None
684 yield None
685 return
685 return
686
686
687 if (
688 cachedelta is not None
689 and nullrev == cachedelta[0]
690 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
691 ):
692 # instruction are to forcibly do a full snapshot
693 yield None
694 return
695
687 deltalength = revlog.length
696 deltalength = revlog.length
688 deltaparent = revlog.deltaparent
697 deltaparent = revlog.deltaparent
689 sparse = revlog._sparserevlog
698 sparse = revlog._sparserevlog
690 good = None
699 good = None
691
700
692 deltas_limit = textlen * LIMIT_DELTA2TEXT
701 deltas_limit = textlen * LIMIT_DELTA2TEXT
693 group_chunk_size = revlog._candidate_group_chunk_size
702 group_chunk_size = revlog._candidate_group_chunk_size
694
703
695 tested = {nullrev}
704 tested = {nullrev}
696 candidates = _refinedgroups(
705 candidates = _refinedgroups(
697 revlog,
706 revlog,
698 p1,
707 p1,
699 p2,
708 p2,
700 cachedelta,
709 cachedelta,
701 snapshot_cache=snapshot_cache,
710 snapshot_cache=snapshot_cache,
702 )
711 )
703 while True:
712 while True:
704 temptative = candidates.send(good)
713 temptative = candidates.send(good)
705 if temptative is None:
714 if temptative is None:
706 break
715 break
707 group = []
716 group = []
708 for rev in temptative:
717 for rev in temptative:
709 # skip over empty delta (no need to include them in a chain)
718 # skip over empty delta (no need to include them in a chain)
710 while revlog._generaldelta and not (
719 while revlog._generaldelta and not (
711 rev == nullrev or rev in tested or deltalength(rev)
720 rev == nullrev or rev in tested or deltalength(rev)
712 ):
721 ):
713 tested.add(rev)
722 tested.add(rev)
714 rev = deltaparent(rev)
723 rev = deltaparent(rev)
715 # no need to try a delta against nullrev, this will be done as a
724 # no need to try a delta against nullrev, this will be done as a
716 # last resort.
725 # last resort.
717 if rev == nullrev:
726 if rev == nullrev:
718 continue
727 continue
719 # filter out revision we tested already
728 # filter out revision we tested already
720 if rev in tested:
729 if rev in tested:
721 continue
730 continue
722
731
723 if (
732 if (
724 cachedelta is not None
733 cachedelta is not None
725 and rev == cachedelta[0]
734 and rev == cachedelta[0]
726 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
735 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
727 ):
736 ):
728 # instructions are to forcibly consider/use this delta base
737 # instructions are to forcibly consider/use this delta base
729 group.append(rev)
738 group.append(rev)
730 continue
739 continue
731
740
732 # an higher authority deamed the base unworthy (e.g. censored)
741 # an higher authority deamed the base unworthy (e.g. censored)
733 if excluded_bases is not None and rev in excluded_bases:
742 if excluded_bases is not None and rev in excluded_bases:
734 tested.add(rev)
743 tested.add(rev)
735 continue
744 continue
736 # We are in some recomputation cases and that rev is too high in
745 # We are in some recomputation cases and that rev is too high in
737 # the revlog
746 # the revlog
738 if target_rev is not None and rev >= target_rev:
747 if target_rev is not None and rev >= target_rev:
739 tested.add(rev)
748 tested.add(rev)
740 continue
749 continue
741 # filter out delta base that will never produce good delta
750 # filter out delta base that will never produce good delta
742 if deltas_limit < revlog.length(rev):
751 if deltas_limit < revlog.length(rev):
743 tested.add(rev)
752 tested.add(rev)
744 continue
753 continue
745 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
754 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
746 tested.add(rev)
755 tested.add(rev)
747 continue
756 continue
748 # no delta for rawtext-changing revs (see "candelta" for why)
757 # no delta for rawtext-changing revs (see "candelta" for why)
749 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
758 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
750 tested.add(rev)
759 tested.add(rev)
751 continue
760 continue
752
761
753 # If we reach here, we are about to build and test a delta.
762 # If we reach here, we are about to build and test a delta.
754 # The delta building process will compute the chaininfo in all
763 # The delta building process will compute the chaininfo in all
755 # case, since that computation is cached, it is fine to access it
764 # case, since that computation is cached, it is fine to access it
756 # here too.
765 # here too.
757 chainlen, chainsize = revlog._chaininfo(rev)
766 chainlen, chainsize = revlog._chaininfo(rev)
758 # if chain will be too long, skip base
767 # if chain will be too long, skip base
759 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
768 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
760 tested.add(rev)
769 tested.add(rev)
761 continue
770 continue
762 # if chain already have too much data, skip base
771 # if chain already have too much data, skip base
763 if deltas_limit < chainsize:
772 if deltas_limit < chainsize:
764 tested.add(rev)
773 tested.add(rev)
765 continue
774 continue
766 if sparse and revlog.upperboundcomp is not None:
775 if sparse and revlog.upperboundcomp is not None:
767 maxcomp = revlog.upperboundcomp
776 maxcomp = revlog.upperboundcomp
768 basenotsnap = (p1, p2, nullrev)
777 basenotsnap = (p1, p2, nullrev)
769 if rev not in basenotsnap and revlog.issnapshot(rev):
778 if rev not in basenotsnap and revlog.issnapshot(rev):
770 snapshotdepth = revlog.snapshotdepth(rev)
779 snapshotdepth = revlog.snapshotdepth(rev)
771 # If text is significantly larger than the base, we can
780 # If text is significantly larger than the base, we can
772 # expect the resulting delta to be proportional to the size
781 # expect the resulting delta to be proportional to the size
773 # difference
782 # difference
774 revsize = revlog.rawsize(rev)
783 revsize = revlog.rawsize(rev)
775 rawsizedistance = max(textlen - revsize, 0)
784 rawsizedistance = max(textlen - revsize, 0)
776 # use an estimate of the compression upper bound.
785 # use an estimate of the compression upper bound.
777 lowestrealisticdeltalen = rawsizedistance // maxcomp
786 lowestrealisticdeltalen = rawsizedistance // maxcomp
778
787
779 # check the absolute constraint on the delta size
788 # check the absolute constraint on the delta size
780 snapshotlimit = textlen >> snapshotdepth
789 snapshotlimit = textlen >> snapshotdepth
781 if snapshotlimit < lowestrealisticdeltalen:
790 if snapshotlimit < lowestrealisticdeltalen:
782 # delta lower bound is larger than accepted upper bound
791 # delta lower bound is larger than accepted upper bound
783 tested.add(rev)
792 tested.add(rev)
784 continue
793 continue
785
794
786 # check the relative constraint on the delta size
795 # check the relative constraint on the delta size
787 revlength = revlog.length(rev)
796 revlength = revlog.length(rev)
788 if revlength < lowestrealisticdeltalen:
797 if revlength < lowestrealisticdeltalen:
789 # delta probable lower bound is larger than target base
798 # delta probable lower bound is larger than target base
790 tested.add(rev)
799 tested.add(rev)
791 continue
800 continue
792
801
793 group.append(rev)
802 group.append(rev)
794 if group:
803 if group:
795 # When the size of the candidate group is big, it can result in a
804 # When the size of the candidate group is big, it can result in a
796 # quite significant performance impact. To reduce this, we can send
805 # quite significant performance impact. To reduce this, we can send
797 # them in smaller batches until the new batch does not provide any
806 # them in smaller batches until the new batch does not provide any
798 # improvements.
807 # improvements.
799 #
808 #
800 # This might reduce the overall efficiency of the compression in
809 # This might reduce the overall efficiency of the compression in
801 # some corner cases, but that should also prevent very pathological
810 # some corner cases, but that should also prevent very pathological
802 # cases from being an issue. (eg. 20 000 candidates).
811 # cases from being an issue. (eg. 20 000 candidates).
803 #
812 #
804 # XXX note that the ordering of the group becomes important as it
813 # XXX note that the ordering of the group becomes important as it
805 # now impacts the final result. The current order is unprocessed
814 # now impacts the final result. The current order is unprocessed
806 # and can be improved.
815 # and can be improved.
807 if group_chunk_size == 0:
816 if group_chunk_size == 0:
808 tested.update(group)
817 tested.update(group)
809 good = yield tuple(group)
818 good = yield tuple(group)
810 else:
819 else:
811 prev_good = good
820 prev_good = good
812 for start in range(0, len(group), group_chunk_size):
821 for start in range(0, len(group), group_chunk_size):
813 sub_group = group[start : start + group_chunk_size]
822 sub_group = group[start : start + group_chunk_size]
814 tested.update(sub_group)
823 tested.update(sub_group)
815 good = yield tuple(sub_group)
824 good = yield tuple(sub_group)
816 if prev_good == good:
825 if prev_good == good:
817 break
826 break
818
827
819 yield None
828 yield None
820
829
821
830
822 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
831 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
823 good = None
832 good = None
824 # First we try to reuse a the delta contained in the bundle.
833 # First we try to reuse a the delta contained in the bundle.
825 # (or from the source revlog)
834 # (or from the source revlog)
826 #
835 #
827 # This logic only applies to general delta repositories and can be disabled
836 # This logic only applies to general delta repositories and can be disabled
828 # through configuration. Disabling reuse source delta is useful when
837 # through configuration. Disabling reuse source delta is useful when
829 # we want to make sure we recomputed "optimal" deltas.
838 # we want to make sure we recomputed "optimal" deltas.
830 debug_info = None
839 debug_info = None
831 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
840 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
832 # Assume what we received from the server is a good choice
841 # Assume what we received from the server is a good choice
833 # build delta will reuse the cache
842 # build delta will reuse the cache
834 if debug_info is not None:
843 if debug_info is not None:
835 debug_info['cached-delta.tested'] += 1
844 debug_info['cached-delta.tested'] += 1
836 good = yield (cachedelta[0],)
845 good = yield (cachedelta[0],)
837 if good is not None:
846 if good is not None:
838 if debug_info is not None:
847 if debug_info is not None:
839 debug_info['cached-delta.accepted'] += 1
848 debug_info['cached-delta.accepted'] += 1
840 yield None
849 yield None
841 return
850 return
842 if snapshot_cache is None:
851 if snapshot_cache is None:
843 snapshot_cache = SnapshotCache()
852 snapshot_cache = SnapshotCache()
844 groups = _rawgroups(
853 groups = _rawgroups(
845 revlog,
854 revlog,
846 p1,
855 p1,
847 p2,
856 p2,
848 cachedelta,
857 cachedelta,
849 snapshot_cache,
858 snapshot_cache,
850 )
859 )
851 for candidates in groups:
860 for candidates in groups:
852 good = yield candidates
861 good = yield candidates
853 if good is not None:
862 if good is not None:
854 break
863 break
855
864
856 # If sparse revlog is enabled, we can try to refine the available deltas
865 # If sparse revlog is enabled, we can try to refine the available deltas
857 if not revlog._sparserevlog:
866 if not revlog._sparserevlog:
858 yield None
867 yield None
859 return
868 return
860
869
861 # if we have a refinable value, try to refine it
870 # if we have a refinable value, try to refine it
862 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
871 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
863 # refine snapshot down
872 # refine snapshot down
864 previous = None
873 previous = None
865 while previous != good:
874 while previous != good:
866 previous = good
875 previous = good
867 base = revlog.deltaparent(good)
876 base = revlog.deltaparent(good)
868 if base == nullrev:
877 if base == nullrev:
869 break
878 break
870 good = yield (base,)
879 good = yield (base,)
871 # refine snapshot up
880 # refine snapshot up
872 if not snapshot_cache.snapshots:
881 if not snapshot_cache.snapshots:
873 snapshot_cache.update(revlog, good + 1)
882 snapshot_cache.update(revlog, good + 1)
874 previous = None
883 previous = None
875 while good != previous:
884 while good != previous:
876 previous = good
885 previous = good
877 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
886 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
878 good = yield children
887 good = yield children
879
888
880 if debug_info is not None:
889 if debug_info is not None:
881 if good is None:
890 if good is None:
882 debug_info['no-solution'] += 1
891 debug_info['no-solution'] += 1
883
892
884 yield None
893 yield None
885
894
886
895
887 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
896 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
888 """Provides group of revision to be tested as delta base
897 """Provides group of revision to be tested as delta base
889
898
890 This lower level function focus on emitting delta theorically interresting
899 This lower level function focus on emitting delta theorically interresting
891 without looking it any practical details.
900 without looking it any practical details.
892
901
893 The group order aims at providing fast or small candidates first.
902 The group order aims at providing fast or small candidates first.
894 """
903 """
895 gdelta = revlog._generaldelta
904 gdelta = revlog._generaldelta
896 # gate sparse behind general-delta because of issue6056
905 # gate sparse behind general-delta because of issue6056
897 sparse = gdelta and revlog._sparserevlog
906 sparse = gdelta and revlog._sparserevlog
898 curr = len(revlog)
907 curr = len(revlog)
899 prev = curr - 1
908 prev = curr - 1
900 deltachain = lambda rev: revlog._deltachain(rev)[0]
909 deltachain = lambda rev: revlog._deltachain(rev)[0]
901
910
902 if gdelta:
911 if gdelta:
903 # exclude already lazy tested base if any
912 # exclude already lazy tested base if any
904 parents = [p for p in (p1, p2) if p != nullrev]
913 parents = [p for p in (p1, p2) if p != nullrev]
905
914
906 if not revlog._deltabothparents and len(parents) == 2:
915 if not revlog._deltabothparents and len(parents) == 2:
907 parents.sort()
916 parents.sort()
908 # To minimize the chance of having to build a fulltext,
917 # To minimize the chance of having to build a fulltext,
909 # pick first whichever parent is closest to us (max rev)
918 # pick first whichever parent is closest to us (max rev)
910 yield (parents[1],)
919 yield (parents[1],)
911 # then the other one (min rev) if the first did not fit
920 # then the other one (min rev) if the first did not fit
912 yield (parents[0],)
921 yield (parents[0],)
913 elif len(parents) > 0:
922 elif len(parents) > 0:
914 # Test all parents (1 or 2), and keep the best candidate
923 # Test all parents (1 or 2), and keep the best candidate
915 yield parents
924 yield parents
916
925
917 if sparse and parents:
926 if sparse and parents:
918 if snapshot_cache is None:
927 if snapshot_cache is None:
919 # map: base-rev: [snapshot-revs]
928 # map: base-rev: [snapshot-revs]
920 snapshot_cache = SnapshotCache()
929 snapshot_cache = SnapshotCache()
921 # See if we can use an existing snapshot in the parent chains to use as
930 # See if we can use an existing snapshot in the parent chains to use as
922 # a base for a new intermediate-snapshot
931 # a base for a new intermediate-snapshot
923 #
932 #
924 # search for snapshot in parents delta chain
933 # search for snapshot in parents delta chain
925 # map: snapshot-level: snapshot-rev
934 # map: snapshot-level: snapshot-rev
926 parents_snaps = collections.defaultdict(set)
935 parents_snaps = collections.defaultdict(set)
927 candidate_chains = [deltachain(p) for p in parents]
936 candidate_chains = [deltachain(p) for p in parents]
928 for chain in candidate_chains:
937 for chain in candidate_chains:
929 for idx, s in enumerate(chain):
938 for idx, s in enumerate(chain):
930 if not revlog.issnapshot(s):
939 if not revlog.issnapshot(s):
931 break
940 break
932 parents_snaps[idx].add(s)
941 parents_snaps[idx].add(s)
933 snapfloor = min(parents_snaps[0]) + 1
942 snapfloor = min(parents_snaps[0]) + 1
934 snapshot_cache.update(revlog, snapfloor)
943 snapshot_cache.update(revlog, snapfloor)
935 # search for the highest "unrelated" revision
944 # search for the highest "unrelated" revision
936 #
945 #
937 # Adding snapshots used by "unrelated" revision increase the odd we
946 # Adding snapshots used by "unrelated" revision increase the odd we
938 # reuse an independant, yet better snapshot chain.
947 # reuse an independant, yet better snapshot chain.
939 #
948 #
940 # XXX instead of building a set of revisions, we could lazily enumerate
949 # XXX instead of building a set of revisions, we could lazily enumerate
941 # over the chains. That would be more efficient, however we stick to
950 # over the chains. That would be more efficient, however we stick to
942 # simple code for now.
951 # simple code for now.
943 all_revs = set()
952 all_revs = set()
944 for chain in candidate_chains:
953 for chain in candidate_chains:
945 all_revs.update(chain)
954 all_revs.update(chain)
946 other = None
955 other = None
947 for r in revlog.revs(prev, snapfloor):
956 for r in revlog.revs(prev, snapfloor):
948 if r not in all_revs:
957 if r not in all_revs:
949 other = r
958 other = r
950 break
959 break
951 if other is not None:
960 if other is not None:
952 # To avoid unfair competition, we won't use unrelated intermediate
961 # To avoid unfair competition, we won't use unrelated intermediate
953 # snapshot that are deeper than the ones from the parent delta
962 # snapshot that are deeper than the ones from the parent delta
954 # chain.
963 # chain.
955 max_depth = max(parents_snaps.keys())
964 max_depth = max(parents_snaps.keys())
956 chain = deltachain(other)
965 chain = deltachain(other)
957 for depth, s in enumerate(chain):
966 for depth, s in enumerate(chain):
958 if s < snapfloor:
967 if s < snapfloor:
959 continue
968 continue
960 if max_depth < depth:
969 if max_depth < depth:
961 break
970 break
962 if not revlog.issnapshot(s):
971 if not revlog.issnapshot(s):
963 break
972 break
964 parents_snaps[depth].add(s)
973 parents_snaps[depth].add(s)
965 # Test them as possible intermediate snapshot base
974 # Test them as possible intermediate snapshot base
966 # We test them from highest to lowest level. High level one are more
975 # We test them from highest to lowest level. High level one are more
967 # likely to result in small delta
976 # likely to result in small delta
968 floor = None
977 floor = None
969 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
978 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
970 siblings = set()
979 siblings = set()
971 for s in snaps:
980 for s in snaps:
972 siblings.update(snapshot_cache.snapshots[s])
981 siblings.update(snapshot_cache.snapshots[s])
973 # Before considering making a new intermediate snapshot, we check
982 # Before considering making a new intermediate snapshot, we check
974 # if an existing snapshot, children of base we consider, would be
983 # if an existing snapshot, children of base we consider, would be
975 # suitable.
984 # suitable.
976 #
985 #
977 # It give a change to reuse a delta chain "unrelated" to the
986 # It give a change to reuse a delta chain "unrelated" to the
978 # current revision instead of starting our own. Without such
987 # current revision instead of starting our own. Without such
979 # re-use, topological branches would keep reopening new chains.
988 # re-use, topological branches would keep reopening new chains.
980 # Creating more and more snapshot as the repository grow.
989 # Creating more and more snapshot as the repository grow.
981
990
982 if floor is not None:
991 if floor is not None:
983 # We only do this for siblings created after the one in our
992 # We only do this for siblings created after the one in our
984 # parent's delta chain. Those created before has less chances
993 # parent's delta chain. Those created before has less chances
985 # to be valid base since our ancestors had to create a new
994 # to be valid base since our ancestors had to create a new
986 # snapshot.
995 # snapshot.
987 siblings = [r for r in siblings if floor < r]
996 siblings = [r for r in siblings if floor < r]
988 yield tuple(sorted(siblings))
997 yield tuple(sorted(siblings))
989 # then test the base from our parent's delta chain.
998 # then test the base from our parent's delta chain.
990 yield tuple(sorted(snaps))
999 yield tuple(sorted(snaps))
991 floor = min(snaps)
1000 floor = min(snaps)
992 # No suitable base found in the parent chain, search if any full
1001 # No suitable base found in the parent chain, search if any full
993 # snapshots emitted since parent's base would be a suitable base for an
1002 # snapshots emitted since parent's base would be a suitable base for an
994 # intermediate snapshot.
1003 # intermediate snapshot.
995 #
1004 #
996 # It give a chance to reuse a delta chain unrelated to the current
1005 # It give a chance to reuse a delta chain unrelated to the current
997 # revisions instead of starting our own. Without such re-use,
1006 # revisions instead of starting our own. Without such re-use,
998 # topological branches would keep reopening new full chains. Creating
1007 # topological branches would keep reopening new full chains. Creating
999 # more and more snapshot as the repository grow.
1008 # more and more snapshot as the repository grow.
1000 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1009 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1001 yield tuple(sorted(full))
1010 yield tuple(sorted(full))
1002
1011
1003 if not sparse:
1012 if not sparse:
1004 # other approach failed try against prev to hopefully save us a
1013 # other approach failed try against prev to hopefully save us a
1005 # fulltext.
1014 # fulltext.
1006 yield (prev,)
1015 yield (prev,)
1007
1016
1008
1017
1009 class SnapshotCache:
1018 class SnapshotCache:
1010 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1019 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1011
1020
1012 def __init__(self):
1021 def __init__(self):
1013 self.snapshots = collections.defaultdict(set)
1022 self.snapshots = collections.defaultdict(set)
1014 self._start_rev = None
1023 self._start_rev = None
1015 self._end_rev = None
1024 self._end_rev = None
1016
1025
1017 def update(self, revlog, start_rev=0):
1026 def update(self, revlog, start_rev=0):
1018 """find snapshots from start_rev to tip"""
1027 """find snapshots from start_rev to tip"""
1019 nb_revs = len(revlog)
1028 nb_revs = len(revlog)
1020 end_rev = nb_revs - 1
1029 end_rev = nb_revs - 1
1021 if start_rev > end_rev:
1030 if start_rev > end_rev:
1022 return # range is empty
1031 return # range is empty
1023
1032
1024 if self._start_rev is None:
1033 if self._start_rev is None:
1025 assert self._end_rev is None
1034 assert self._end_rev is None
1026 self._update(revlog, start_rev, end_rev)
1035 self._update(revlog, start_rev, end_rev)
1027 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1036 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1028 if start_rev < self._start_rev:
1037 if start_rev < self._start_rev:
1029 self._update(revlog, start_rev, self._start_rev - 1)
1038 self._update(revlog, start_rev, self._start_rev - 1)
1030 if self._end_rev < end_rev:
1039 if self._end_rev < end_rev:
1031 self._update(revlog, self._end_rev + 1, end_rev)
1040 self._update(revlog, self._end_rev + 1, end_rev)
1032
1041
1033 if self._start_rev is None:
1042 if self._start_rev is None:
1034 assert self._end_rev is None
1043 assert self._end_rev is None
1035 self._end_rev = end_rev
1044 self._end_rev = end_rev
1036 self._start_rev = start_rev
1045 self._start_rev = start_rev
1037 else:
1046 else:
1038 self._start_rev = min(self._start_rev, start_rev)
1047 self._start_rev = min(self._start_rev, start_rev)
1039 self._end_rev = max(self._end_rev, end_rev)
1048 self._end_rev = max(self._end_rev, end_rev)
1040 assert self._start_rev <= self._end_rev, (
1049 assert self._start_rev <= self._end_rev, (
1041 self._start_rev,
1050 self._start_rev,
1042 self._end_rev,
1051 self._end_rev,
1043 )
1052 )
1044
1053
1045 def _update(self, revlog, start_rev, end_rev):
1054 def _update(self, revlog, start_rev, end_rev):
1046 """internal method that actually do update content"""
1055 """internal method that actually do update content"""
1047 assert self._start_rev is None or (
1056 assert self._start_rev is None or (
1048 start_rev < self._start_rev or start_rev > self._end_rev
1057 start_rev < self._start_rev or start_rev > self._end_rev
1049 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1058 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1050 assert self._start_rev is None or (
1059 assert self._start_rev is None or (
1051 end_rev < self._start_rev or end_rev > self._end_rev
1060 end_rev < self._start_rev or end_rev > self._end_rev
1052 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1061 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1053 cache = self.snapshots
1062 cache = self.snapshots
1054 if util.safehasattr(revlog.index, b'findsnapshots'):
1063 if util.safehasattr(revlog.index, b'findsnapshots'):
1055 revlog.index.findsnapshots(cache, start_rev, end_rev)
1064 revlog.index.findsnapshots(cache, start_rev, end_rev)
1056 else:
1065 else:
1057 deltaparent = revlog.deltaparent
1066 deltaparent = revlog.deltaparent
1058 issnapshot = revlog.issnapshot
1067 issnapshot = revlog.issnapshot
1059 for rev in revlog.revs(start_rev, end_rev):
1068 for rev in revlog.revs(start_rev, end_rev):
1060 if issnapshot(rev):
1069 if issnapshot(rev):
1061 cache[deltaparent(rev)].add(rev)
1070 cache[deltaparent(rev)].add(rev)
1062
1071
1063
1072
1064 class deltacomputer:
1073 class deltacomputer:
1065 def __init__(
1074 def __init__(
1066 self,
1075 self,
1067 revlog,
1076 revlog,
1068 write_debug=None,
1077 write_debug=None,
1069 debug_search=False,
1078 debug_search=False,
1070 debug_info=None,
1079 debug_info=None,
1071 ):
1080 ):
1072 self.revlog = revlog
1081 self.revlog = revlog
1073 self._write_debug = write_debug
1082 self._write_debug = write_debug
1074 self._debug_search = debug_search
1083 self._debug_search = debug_search
1075 self._debug_info = debug_info
1084 self._debug_info = debug_info
1076 self._snapshot_cache = SnapshotCache()
1085 self._snapshot_cache = SnapshotCache()
1077
1086
1078 def buildtext(self, revinfo, fh):
1087 def buildtext(self, revinfo, fh):
1079 """Builds a fulltext version of a revision
1088 """Builds a fulltext version of a revision
1080
1089
1081 revinfo: revisioninfo instance that contains all needed info
1090 revinfo: revisioninfo instance that contains all needed info
1082 fh: file handle to either the .i or the .d revlog file,
1091 fh: file handle to either the .i or the .d revlog file,
1083 depending on whether it is inlined or not
1092 depending on whether it is inlined or not
1084 """
1093 """
1085 btext = revinfo.btext
1094 btext = revinfo.btext
1086 if btext[0] is not None:
1095 if btext[0] is not None:
1087 return btext[0]
1096 return btext[0]
1088
1097
1089 revlog = self.revlog
1098 revlog = self.revlog
1090 cachedelta = revinfo.cachedelta
1099 cachedelta = revinfo.cachedelta
1091 baserev = cachedelta[0]
1100 baserev = cachedelta[0]
1092 delta = cachedelta[1]
1101 delta = cachedelta[1]
1093
1102
1094 fulltext = btext[0] = _textfromdelta(
1103 fulltext = btext[0] = _textfromdelta(
1095 fh,
1104 fh,
1096 revlog,
1105 revlog,
1097 baserev,
1106 baserev,
1098 delta,
1107 delta,
1099 revinfo.p1,
1108 revinfo.p1,
1100 revinfo.p2,
1109 revinfo.p2,
1101 revinfo.flags,
1110 revinfo.flags,
1102 revinfo.node,
1111 revinfo.node,
1103 )
1112 )
1104 return fulltext
1113 return fulltext
1105
1114
1106 def _builddeltadiff(self, base, revinfo, fh):
1115 def _builddeltadiff(self, base, revinfo, fh):
1107 revlog = self.revlog
1116 revlog = self.revlog
1108 t = self.buildtext(revinfo, fh)
1117 t = self.buildtext(revinfo, fh)
1109 if revlog.iscensored(base):
1118 if revlog.iscensored(base):
1110 # deltas based on a censored revision must replace the
1119 # deltas based on a censored revision must replace the
1111 # full content in one patch, so delta works everywhere
1120 # full content in one patch, so delta works everywhere
1112 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1121 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1113 delta = header + t
1122 delta = header + t
1114 else:
1123 else:
1115 ptext = revlog.rawdata(base, _df=fh)
1124 ptext = revlog.rawdata(base, _df=fh)
1116 delta = mdiff.textdiff(ptext, t)
1125 delta = mdiff.textdiff(ptext, t)
1117
1126
1118 return delta
1127 return delta
1119
1128
1120 def _builddeltainfo(self, revinfo, base, fh):
1129 def _builddeltainfo(self, revinfo, base, fh):
1121 # can we use the cached delta?
1130 # can we use the cached delta?
1122 revlog = self.revlog
1131 revlog = self.revlog
1123 debug_search = self._write_debug is not None and self._debug_search
1132 debug_search = self._write_debug is not None and self._debug_search
1124 chainbase = revlog.chainbase(base)
1133 chainbase = revlog.chainbase(base)
1125 if revlog._generaldelta:
1134 if revlog._generaldelta:
1126 deltabase = base
1135 deltabase = base
1127 else:
1136 else:
1128 deltabase = chainbase
1137 deltabase = chainbase
1129 snapshotdepth = None
1138 snapshotdepth = None
1130 if revlog._sparserevlog and deltabase == nullrev:
1139 if revlog._sparserevlog and deltabase == nullrev:
1131 snapshotdepth = 0
1140 snapshotdepth = 0
1132 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1141 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1133 # A delta chain should always be one full snapshot,
1142 # A delta chain should always be one full snapshot,
1134 # zero or more semi-snapshots, and zero or more deltas
1143 # zero or more semi-snapshots, and zero or more deltas
1135 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1144 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1136 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1145 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1137 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1146 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1138 delta = None
1147 delta = None
1139 if revinfo.cachedelta:
1148 if revinfo.cachedelta:
1140 cachebase = revinfo.cachedelta[0]
1149 cachebase = revinfo.cachedelta[0]
1141 # check if the diff still apply
1150 # check if the diff still apply
1142 currentbase = cachebase
1151 currentbase = cachebase
1143 while (
1152 while (
1144 currentbase != nullrev
1153 currentbase != nullrev
1145 and currentbase != base
1154 and currentbase != base
1146 and self.revlog.length(currentbase) == 0
1155 and self.revlog.length(currentbase) == 0
1147 ):
1156 ):
1148 currentbase = self.revlog.deltaparent(currentbase)
1157 currentbase = self.revlog.deltaparent(currentbase)
1149 if self.revlog._lazydelta and currentbase == base:
1158 if self.revlog._lazydelta and currentbase == base:
1150 delta = revinfo.cachedelta[1]
1159 delta = revinfo.cachedelta[1]
1151 if delta is None:
1160 if delta is None:
1152 delta = self._builddeltadiff(base, revinfo, fh)
1161 delta = self._builddeltadiff(base, revinfo, fh)
1153 if debug_search:
1162 if debug_search:
1154 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1163 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1155 msg %= len(delta)
1164 msg %= len(delta)
1156 self._write_debug(msg)
1165 self._write_debug(msg)
1157 # snapshotdept need to be neither None nor 0 level snapshot
1166 # snapshotdept need to be neither None nor 0 level snapshot
1158 if revlog.upperboundcomp is not None and snapshotdepth:
1167 if revlog.upperboundcomp is not None and snapshotdepth:
1159 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1168 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1160 snapshotlimit = revinfo.textlen >> snapshotdepth
1169 snapshotlimit = revinfo.textlen >> snapshotdepth
1161 if debug_search:
1170 if debug_search:
1162 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1171 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1163 msg %= lowestrealisticdeltalen
1172 msg %= lowestrealisticdeltalen
1164 self._write_debug(msg)
1173 self._write_debug(msg)
1165 if snapshotlimit < lowestrealisticdeltalen:
1174 if snapshotlimit < lowestrealisticdeltalen:
1166 if debug_search:
1175 if debug_search:
1167 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1176 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1168 self._write_debug(msg)
1177 self._write_debug(msg)
1169 return None
1178 return None
1170 if revlog.length(base) < lowestrealisticdeltalen:
1179 if revlog.length(base) < lowestrealisticdeltalen:
1171 if debug_search:
1180 if debug_search:
1172 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1181 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1173 self._write_debug(msg)
1182 self._write_debug(msg)
1174 return None
1183 return None
1175 header, data = revlog.compress(delta)
1184 header, data = revlog.compress(delta)
1176 deltalen = len(header) + len(data)
1185 deltalen = len(header) + len(data)
1177 offset = revlog.end(len(revlog) - 1)
1186 offset = revlog.end(len(revlog) - 1)
1178 dist = deltalen + offset - revlog.start(chainbase)
1187 dist = deltalen + offset - revlog.start(chainbase)
1179 chainlen, compresseddeltalen = revlog._chaininfo(base)
1188 chainlen, compresseddeltalen = revlog._chaininfo(base)
1180 chainlen += 1
1189 chainlen += 1
1181 compresseddeltalen += deltalen
1190 compresseddeltalen += deltalen
1182
1191
1183 return _deltainfo(
1192 return _deltainfo(
1184 dist,
1193 dist,
1185 deltalen,
1194 deltalen,
1186 (header, data),
1195 (header, data),
1187 deltabase,
1196 deltabase,
1188 chainbase,
1197 chainbase,
1189 chainlen,
1198 chainlen,
1190 compresseddeltalen,
1199 compresseddeltalen,
1191 snapshotdepth,
1200 snapshotdepth,
1192 )
1201 )
1193
1202
1194 def _fullsnapshotinfo(self, fh, revinfo, curr):
1203 def _fullsnapshotinfo(self, fh, revinfo, curr):
1195 rawtext = self.buildtext(revinfo, fh)
1204 rawtext = self.buildtext(revinfo, fh)
1196 data = self.revlog.compress(rawtext)
1205 data = self.revlog.compress(rawtext)
1197 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1206 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1198 deltabase = chainbase = curr
1207 deltabase = chainbase = curr
1199 snapshotdepth = 0
1208 snapshotdepth = 0
1200 chainlen = 1
1209 chainlen = 1
1201
1210
1202 return _deltainfo(
1211 return _deltainfo(
1203 dist,
1212 dist,
1204 deltalen,
1213 deltalen,
1205 data,
1214 data,
1206 deltabase,
1215 deltabase,
1207 chainbase,
1216 chainbase,
1208 chainlen,
1217 chainlen,
1209 compresseddeltalen,
1218 compresseddeltalen,
1210 snapshotdepth,
1219 snapshotdepth,
1211 )
1220 )
1212
1221
1213 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1222 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1214 """Find an acceptable delta against a candidate revision
1223 """Find an acceptable delta against a candidate revision
1215
1224
1216 revinfo: information about the revision (instance of _revisioninfo)
1225 revinfo: information about the revision (instance of _revisioninfo)
1217 fh: file handle to either the .i or the .d revlog file,
1226 fh: file handle to either the .i or the .d revlog file,
1218 depending on whether it is inlined or not
1227 depending on whether it is inlined or not
1219
1228
1220 Returns the first acceptable candidate revision, as ordered by
1229 Returns the first acceptable candidate revision, as ordered by
1221 _candidategroups
1230 _candidategroups
1222
1231
1223 If no suitable deltabase is found, we return delta info for a full
1232 If no suitable deltabase is found, we return delta info for a full
1224 snapshot.
1233 snapshot.
1225
1234
1226 `excluded_bases` is an optional set of revision that cannot be used as
1235 `excluded_bases` is an optional set of revision that cannot be used as
1227 a delta base. Use this to recompute delta suitable in censor or strip
1236 a delta base. Use this to recompute delta suitable in censor or strip
1228 context.
1237 context.
1229 """
1238 """
1230 if target_rev is None:
1239 if target_rev is None:
1231 target_rev = len(self.revlog)
1240 target_rev = len(self.revlog)
1232
1241
1233 if not revinfo.textlen:
1242 if not revinfo.textlen:
1234 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1243 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1235
1244
1236 if excluded_bases is None:
1245 if excluded_bases is None:
1237 excluded_bases = set()
1246 excluded_bases = set()
1238
1247
1239 # no delta for flag processor revision (see "candelta" for why)
1248 # no delta for flag processor revision (see "candelta" for why)
1240 # not calling candelta since only one revision needs test, also to
1249 # not calling candelta since only one revision needs test, also to
1241 # avoid overhead fetching flags again.
1250 # avoid overhead fetching flags again.
1242 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1251 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1243 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1252 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1244
1253
1245 gather_debug = (
1254 gather_debug = (
1246 self._write_debug is not None or self._debug_info is not None
1255 self._write_debug is not None or self._debug_info is not None
1247 )
1256 )
1248 debug_search = self._write_debug is not None and self._debug_search
1257 debug_search = self._write_debug is not None and self._debug_search
1249
1258
1250 if gather_debug:
1259 if gather_debug:
1251 start = util.timer()
1260 start = util.timer()
1252
1261
1253 # count the number of different delta we tried (for debug purpose)
1262 # count the number of different delta we tried (for debug purpose)
1254 dbg_try_count = 0
1263 dbg_try_count = 0
1255 # count the number of "search round" we did. (for debug purpose)
1264 # count the number of "search round" we did. (for debug purpose)
1256 dbg_try_rounds = 0
1265 dbg_try_rounds = 0
1257 dbg_type = b'unknown'
1266 dbg_type = b'unknown'
1258
1267
1259 cachedelta = revinfo.cachedelta
1268 cachedelta = revinfo.cachedelta
1260 p1 = revinfo.p1
1269 p1 = revinfo.p1
1261 p2 = revinfo.p2
1270 p2 = revinfo.p2
1262 revlog = self.revlog
1271 revlog = self.revlog
1263
1272
1264 deltainfo = None
1273 deltainfo = None
1265 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1274 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1266
1275
1267 if gather_debug:
1276 if gather_debug:
1268 if p1r != nullrev:
1277 if p1r != nullrev:
1269 p1_chain_len = revlog._chaininfo(p1r)[0]
1278 p1_chain_len = revlog._chaininfo(p1r)[0]
1270 else:
1279 else:
1271 p1_chain_len = -1
1280 p1_chain_len = -1
1272 if p2r != nullrev:
1281 if p2r != nullrev:
1273 p2_chain_len = revlog._chaininfo(p2r)[0]
1282 p2_chain_len = revlog._chaininfo(p2r)[0]
1274 else:
1283 else:
1275 p2_chain_len = -1
1284 p2_chain_len = -1
1276 if debug_search:
1285 if debug_search:
1277 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1286 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1278 msg %= target_rev
1287 msg %= target_rev
1279 self._write_debug(msg)
1288 self._write_debug(msg)
1280
1289
1281 groups = _candidategroups(
1290 groups = _candidategroups(
1282 self.revlog,
1291 self.revlog,
1283 revinfo.textlen,
1292 revinfo.textlen,
1284 p1r,
1293 p1r,
1285 p2r,
1294 p2r,
1286 cachedelta,
1295 cachedelta,
1287 excluded_bases,
1296 excluded_bases,
1288 target_rev,
1297 target_rev,
1289 snapshot_cache=self._snapshot_cache,
1298 snapshot_cache=self._snapshot_cache,
1290 )
1299 )
1291 candidaterevs = next(groups)
1300 candidaterevs = next(groups)
1292 while candidaterevs is not None:
1301 while candidaterevs is not None:
1293 dbg_try_rounds += 1
1302 dbg_try_rounds += 1
1294 if debug_search:
1303 if debug_search:
1295 prev = None
1304 prev = None
1296 if deltainfo is not None:
1305 if deltainfo is not None:
1297 prev = deltainfo.base
1306 prev = deltainfo.base
1298
1307
1299 if (
1308 if (
1300 cachedelta is not None
1309 cachedelta is not None
1301 and len(candidaterevs) == 1
1310 and len(candidaterevs) == 1
1302 and cachedelta[0] in candidaterevs
1311 and cachedelta[0] in candidaterevs
1303 ):
1312 ):
1304 round_type = b"cached-delta"
1313 round_type = b"cached-delta"
1305 elif p1 in candidaterevs or p2 in candidaterevs:
1314 elif p1 in candidaterevs or p2 in candidaterevs:
1306 round_type = b"parents"
1315 round_type = b"parents"
1307 elif prev is not None and all(c < prev for c in candidaterevs):
1316 elif prev is not None and all(c < prev for c in candidaterevs):
1308 round_type = b"refine-down"
1317 round_type = b"refine-down"
1309 elif prev is not None and all(c > prev for c in candidaterevs):
1318 elif prev is not None and all(c > prev for c in candidaterevs):
1310 round_type = b"refine-up"
1319 round_type = b"refine-up"
1311 else:
1320 else:
1312 round_type = b"search-down"
1321 round_type = b"search-down"
1313 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1322 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1314 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1323 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1315 self._write_debug(msg)
1324 self._write_debug(msg)
1316 nominateddeltas = []
1325 nominateddeltas = []
1317 if deltainfo is not None:
1326 if deltainfo is not None:
1318 if debug_search:
1327 if debug_search:
1319 msg = (
1328 msg = (
1320 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1329 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1321 )
1330 )
1322 msg %= (deltainfo.base, deltainfo.deltalen)
1331 msg %= (deltainfo.base, deltainfo.deltalen)
1323 self._write_debug(msg)
1332 self._write_debug(msg)
1324 # if we already found a good delta,
1333 # if we already found a good delta,
1325 # challenge it against refined candidates
1334 # challenge it against refined candidates
1326 nominateddeltas.append(deltainfo)
1335 nominateddeltas.append(deltainfo)
1327 for candidaterev in candidaterevs:
1336 for candidaterev in candidaterevs:
1328 if debug_search:
1337 if debug_search:
1329 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1338 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1330 msg %= candidaterev
1339 msg %= candidaterev
1331 self._write_debug(msg)
1340 self._write_debug(msg)
1332 candidate_type = None
1341 candidate_type = None
1333 if candidaterev == p1:
1342 if candidaterev == p1:
1334 candidate_type = b"p1"
1343 candidate_type = b"p1"
1335 elif candidaterev == p2:
1344 elif candidaterev == p2:
1336 candidate_type = b"p2"
1345 candidate_type = b"p2"
1337 elif self.revlog.issnapshot(candidaterev):
1346 elif self.revlog.issnapshot(candidaterev):
1338 candidate_type = b"snapshot-%d"
1347 candidate_type = b"snapshot-%d"
1339 candidate_type %= self.revlog.snapshotdepth(
1348 candidate_type %= self.revlog.snapshotdepth(
1340 candidaterev
1349 candidaterev
1341 )
1350 )
1342
1351
1343 if candidate_type is not None:
1352 if candidate_type is not None:
1344 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1353 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1345 msg %= candidate_type
1354 msg %= candidate_type
1346 self._write_debug(msg)
1355 self._write_debug(msg)
1347 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1356 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1348 msg %= self.revlog.length(candidaterev)
1357 msg %= self.revlog.length(candidaterev)
1349 self._write_debug(msg)
1358 self._write_debug(msg)
1350 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1359 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1351 msg %= self.revlog.deltaparent(candidaterev)
1360 msg %= self.revlog.deltaparent(candidaterev)
1352 self._write_debug(msg)
1361 self._write_debug(msg)
1353
1362
1354 dbg_try_count += 1
1363 dbg_try_count += 1
1355
1364
1356 if debug_search:
1365 if debug_search:
1357 delta_start = util.timer()
1366 delta_start = util.timer()
1358 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1367 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1359 if debug_search:
1368 if debug_search:
1360 delta_end = util.timer()
1369 delta_end = util.timer()
1361 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1370 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1362 msg %= delta_end - delta_start
1371 msg %= delta_end - delta_start
1363 self._write_debug(msg)
1372 self._write_debug(msg)
1364 if candidatedelta is not None:
1373 if candidatedelta is not None:
1365 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1374 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1366 if debug_search:
1375 if debug_search:
1367 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1376 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1368 msg %= candidatedelta.deltalen
1377 msg %= candidatedelta.deltalen
1369 self._write_debug(msg)
1378 self._write_debug(msg)
1370 nominateddeltas.append(candidatedelta)
1379 nominateddeltas.append(candidatedelta)
1371 elif debug_search:
1380 elif debug_search:
1372 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1381 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1373 msg %= candidatedelta.deltalen
1382 msg %= candidatedelta.deltalen
1374 self._write_debug(msg)
1383 self._write_debug(msg)
1375 elif debug_search:
1384 elif debug_search:
1376 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1385 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1377 self._write_debug(msg)
1386 self._write_debug(msg)
1378 if nominateddeltas:
1387 if nominateddeltas:
1379 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1388 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1380 if deltainfo is not None:
1389 if deltainfo is not None:
1381 candidaterevs = groups.send(deltainfo.base)
1390 candidaterevs = groups.send(deltainfo.base)
1382 else:
1391 else:
1383 candidaterevs = next(groups)
1392 candidaterevs = next(groups)
1384
1393
1385 if deltainfo is None:
1394 if deltainfo is None:
1386 dbg_type = b"full"
1395 dbg_type = b"full"
1387 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1396 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1388 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1397 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1389 dbg_type = b"snapshot"
1398 dbg_type = b"snapshot"
1390 else:
1399 else:
1391 dbg_type = b"delta"
1400 dbg_type = b"delta"
1392
1401
1393 if gather_debug:
1402 if gather_debug:
1394 end = util.timer()
1403 end = util.timer()
1395 if dbg_type == b'full':
1404 if dbg_type == b'full':
1396 used_cached = (
1405 used_cached = (
1397 cachedelta is not None
1406 cachedelta is not None
1398 and dbg_try_rounds == 0
1407 and dbg_try_rounds == 0
1399 and dbg_try_count == 0
1408 and dbg_try_count == 0
1400 and cachedelta[0] == nullrev
1409 and cachedelta[0] == nullrev
1401 )
1410 )
1402 else:
1411 else:
1403 used_cached = (
1412 used_cached = (
1404 cachedelta is not None
1413 cachedelta is not None
1405 and dbg_try_rounds == 1
1414 and dbg_try_rounds == 1
1406 and dbg_try_count == 1
1415 and dbg_try_count == 1
1407 and deltainfo.base == cachedelta[0]
1416 and deltainfo.base == cachedelta[0]
1408 )
1417 )
1409 dbg = {
1418 dbg = {
1410 'duration': end - start,
1419 'duration': end - start,
1411 'revision': target_rev,
1420 'revision': target_rev,
1412 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1421 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1413 'search_round_count': dbg_try_rounds,
1422 'search_round_count': dbg_try_rounds,
1414 'using-cached-base': used_cached,
1423 'using-cached-base': used_cached,
1415 'delta_try_count': dbg_try_count,
1424 'delta_try_count': dbg_try_count,
1416 'type': dbg_type,
1425 'type': dbg_type,
1417 'p1-chain-len': p1_chain_len,
1426 'p1-chain-len': p1_chain_len,
1418 'p2-chain-len': p2_chain_len,
1427 'p2-chain-len': p2_chain_len,
1419 }
1428 }
1420 if (
1429 if (
1421 deltainfo.snapshotdepth # pytype: disable=attribute-error
1430 deltainfo.snapshotdepth # pytype: disable=attribute-error
1422 is not None
1431 is not None
1423 ):
1432 ):
1424 dbg[
1433 dbg[
1425 'snapshot-depth'
1434 'snapshot-depth'
1426 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1435 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1427 else:
1436 else:
1428 dbg['snapshot-depth'] = 0
1437 dbg['snapshot-depth'] = 0
1429 target_revlog = b"UNKNOWN"
1438 target_revlog = b"UNKNOWN"
1430 target_type = self.revlog.target[0]
1439 target_type = self.revlog.target[0]
1431 target_key = self.revlog.target[1]
1440 target_key = self.revlog.target[1]
1432 if target_type == KIND_CHANGELOG:
1441 if target_type == KIND_CHANGELOG:
1433 target_revlog = b'CHANGELOG:'
1442 target_revlog = b'CHANGELOG:'
1434 elif target_type == KIND_MANIFESTLOG:
1443 elif target_type == KIND_MANIFESTLOG:
1435 target_revlog = b'MANIFESTLOG:'
1444 target_revlog = b'MANIFESTLOG:'
1436 if target_key:
1445 if target_key:
1437 target_revlog += b'%s:' % target_key
1446 target_revlog += b'%s:' % target_key
1438 elif target_type == KIND_FILELOG:
1447 elif target_type == KIND_FILELOG:
1439 target_revlog = b'FILELOG:'
1448 target_revlog = b'FILELOG:'
1440 if target_key:
1449 if target_key:
1441 target_revlog += b'%s:' % target_key
1450 target_revlog += b'%s:' % target_key
1442 dbg['target-revlog'] = target_revlog
1451 dbg['target-revlog'] = target_revlog
1443
1452
1444 if self._debug_info is not None:
1453 if self._debug_info is not None:
1445 self._debug_info.append(dbg)
1454 self._debug_info.append(dbg)
1446
1455
1447 if self._write_debug is not None:
1456 if self._write_debug is not None:
1448 msg = (
1457 msg = (
1449 b"DBG-DELTAS:"
1458 b"DBG-DELTAS:"
1450 b" %-12s"
1459 b" %-12s"
1451 b" rev=%d:"
1460 b" rev=%d:"
1452 b" delta-base=%d"
1461 b" delta-base=%d"
1453 b" is-cached=%d"
1462 b" is-cached=%d"
1454 b" - search-rounds=%d"
1463 b" - search-rounds=%d"
1455 b" try-count=%d"
1464 b" try-count=%d"
1456 b" - delta-type=%-6s"
1465 b" - delta-type=%-6s"
1457 b" snap-depth=%d"
1466 b" snap-depth=%d"
1458 b" - p1-chain-length=%d"
1467 b" - p1-chain-length=%d"
1459 b" p2-chain-length=%d"
1468 b" p2-chain-length=%d"
1460 b" - duration=%f"
1469 b" - duration=%f"
1461 b"\n"
1470 b"\n"
1462 )
1471 )
1463 msg %= (
1472 msg %= (
1464 dbg["target-revlog"],
1473 dbg["target-revlog"],
1465 dbg["revision"],
1474 dbg["revision"],
1466 dbg["delta-base"],
1475 dbg["delta-base"],
1467 dbg["using-cached-base"],
1476 dbg["using-cached-base"],
1468 dbg["search_round_count"],
1477 dbg["search_round_count"],
1469 dbg["delta_try_count"],
1478 dbg["delta_try_count"],
1470 dbg["type"],
1479 dbg["type"],
1471 dbg["snapshot-depth"],
1480 dbg["snapshot-depth"],
1472 dbg["p1-chain-len"],
1481 dbg["p1-chain-len"],
1473 dbg["p2-chain-len"],
1482 dbg["p2-chain-len"],
1474 dbg["duration"],
1483 dbg["duration"],
1475 )
1484 )
1476 self._write_debug(msg)
1485 self._write_debug(msg)
1477 return deltainfo
1486 return deltainfo
1478
1487
1479
1488
1480 def delta_compression(default_compression_header, deltainfo):
1489 def delta_compression(default_compression_header, deltainfo):
1481 """return (COMPRESSION_MODE, deltainfo)
1490 """return (COMPRESSION_MODE, deltainfo)
1482
1491
1483 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1492 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1484 compression.
1493 compression.
1485 """
1494 """
1486 h, d = deltainfo.data
1495 h, d = deltainfo.data
1487 compression_mode = COMP_MODE_INLINE
1496 compression_mode = COMP_MODE_INLINE
1488 if not h and not d:
1497 if not h and not d:
1489 # not data to store at all... declare them uncompressed
1498 # not data to store at all... declare them uncompressed
1490 compression_mode = COMP_MODE_PLAIN
1499 compression_mode = COMP_MODE_PLAIN
1491 elif not h:
1500 elif not h:
1492 t = d[0:1]
1501 t = d[0:1]
1493 if t == b'\0':
1502 if t == b'\0':
1494 compression_mode = COMP_MODE_PLAIN
1503 compression_mode = COMP_MODE_PLAIN
1495 elif t == default_compression_header:
1504 elif t == default_compression_header:
1496 compression_mode = COMP_MODE_DEFAULT
1505 compression_mode = COMP_MODE_DEFAULT
1497 elif h == b'u':
1506 elif h == b'u':
1498 # we have a more efficient way to declare uncompressed
1507 # we have a more efficient way to declare uncompressed
1499 h = b''
1508 h = b''
1500 compression_mode = COMP_MODE_PLAIN
1509 compression_mode = COMP_MODE_PLAIN
1501 deltainfo = drop_u_compression(deltainfo)
1510 deltainfo = drop_u_compression(deltainfo)
1502 return compression_mode, deltainfo
1511 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now