##// END OF EJS Templates
delta-fine: use the `_debug_search` attribute directly...
marmoute -
r51542:c84cc0ac stable
parent child Browse files
Show More
@@ -1,1532 +1,1533 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 self._data = data
51 self._data = data
52 self._srdensitythreshold = density
52 self._srdensitythreshold = density
53 self._srmingapsize = mingap
53 self._srmingapsize = mingap
54 self._snapshot = set(snapshot)
54 self._snapshot = set(snapshot)
55 self.index = None
55 self.index = None
56
56
57 def start(self, rev):
57 def start(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 if rev == 0:
60 if rev == 0:
61 return 0
61 return 0
62 return self._data[rev - 1]
62 return self._data[rev - 1]
63
63
64 def end(self, rev):
64 def end(self, rev):
65 if rev == nullrev:
65 if rev == nullrev:
66 return 0
66 return 0
67 return self._data[rev]
67 return self._data[rev]
68
68
69 def length(self, rev):
69 def length(self, rev):
70 return self.end(rev) - self.start(rev)
70 return self.end(rev) - self.start(rev)
71
71
72 def __len__(self):
72 def __len__(self):
73 return len(self._data)
73 return len(self._data)
74
74
75 def issnapshot(self, rev):
75 def issnapshot(self, rev):
76 if rev == nullrev:
76 if rev == nullrev:
77 return True
77 return True
78 return rev in self._snapshot
78 return rev in self._snapshot
79
79
80
80
81 def slicechunk(revlog, revs, targetsize=None):
81 def slicechunk(revlog, revs, targetsize=None):
82 """slice revs to reduce the amount of unrelated data to be read from disk.
82 """slice revs to reduce the amount of unrelated data to be read from disk.
83
83
84 ``revs`` is sliced into groups that should be read in one time.
84 ``revs`` is sliced into groups that should be read in one time.
85 Assume that revs are sorted.
85 Assume that revs are sorted.
86
86
87 The initial chunk is sliced until the overall density (payload/chunks-span
87 The initial chunk is sliced until the overall density (payload/chunks-span
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 `revlog._srmingapsize` is skipped.
89 `revlog._srmingapsize` is skipped.
90
90
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 For consistency with other slicing choice, this limit won't go lower than
92 For consistency with other slicing choice, this limit won't go lower than
93 `revlog._srmingapsize`.
93 `revlog._srmingapsize`.
94
94
95 If individual revisions chunk are larger than this limit, they will still
95 If individual revisions chunk are larger than this limit, they will still
96 be raised individually.
96 be raised individually.
97
97
98 >>> data = [
98 >>> data = [
99 ... 5, #00 (5)
99 ... 5, #00 (5)
100 ... 10, #01 (5)
100 ... 10, #01 (5)
101 ... 12, #02 (2)
101 ... 12, #02 (2)
102 ... 12, #03 (empty)
102 ... 12, #03 (empty)
103 ... 27, #04 (15)
103 ... 27, #04 (15)
104 ... 31, #05 (4)
104 ... 31, #05 (4)
105 ... 31, #06 (empty)
105 ... 31, #06 (empty)
106 ... 42, #07 (11)
106 ... 42, #07 (11)
107 ... 47, #08 (5)
107 ... 47, #08 (5)
108 ... 47, #09 (empty)
108 ... 47, #09 (empty)
109 ... 48, #10 (1)
109 ... 48, #10 (1)
110 ... 51, #11 (3)
110 ... 51, #11 (3)
111 ... 74, #12 (23)
111 ... 74, #12 (23)
112 ... 85, #13 (11)
112 ... 85, #13 (11)
113 ... 86, #14 (1)
113 ... 86, #14 (1)
114 ... 91, #15 (5)
114 ... 91, #15 (5)
115 ... ]
115 ... ]
116 >>> revlog = _testrevlog(data, snapshot=range(16))
116 >>> revlog = _testrevlog(data, snapshot=range(16))
117
117
118 >>> list(slicechunk(revlog, list(range(16))))
118 >>> list(slicechunk(revlog, list(range(16))))
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 >>> list(slicechunk(revlog, [0, 15]))
120 >>> list(slicechunk(revlog, [0, 15]))
121 [[0], [15]]
121 [[0], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 [[0], [11], [15]]
123 [[0], [11], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 [[0], [11, 13, 15]]
125 [[0], [11, 13, 15]]
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 [[1, 2], [5, 8, 10, 11], [14]]
127 [[1, 2], [5, 8, 10, 11], [14]]
128
128
129 Slicing with a maximum chunk size
129 Slicing with a maximum chunk size
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 [[0], [11], [13], [15]]
131 [[0], [11], [13], [15]]
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 [[0], [11], [13, 15]]
133 [[0], [11], [13, 15]]
134
134
135 Slicing involving nullrev
135 Slicing involving nullrev
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 [[-1, 0], [11], [13, 15]]
137 [[-1, 0], [11], [13, 15]]
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 [[-1], [13], [15]]
139 [[-1], [13], [15]]
140 """
140 """
141 if targetsize is not None:
141 if targetsize is not None:
142 targetsize = max(targetsize, revlog._srmingapsize)
142 targetsize = max(targetsize, revlog._srmingapsize)
143 # targetsize should not be specified when evaluating delta candidates:
143 # targetsize should not be specified when evaluating delta candidates:
144 # * targetsize is used to ensure we stay within specification when reading,
144 # * targetsize is used to ensure we stay within specification when reading,
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 if densityslicing is None:
146 if densityslicing is None:
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 for chunk in densityslicing(
148 for chunk in densityslicing(
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 ):
150 ):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 yield subchunk
152 yield subchunk
153
153
154
154
155 def _slicechunktosize(revlog, revs, targetsize=None):
155 def _slicechunktosize(revlog, revs, targetsize=None):
156 """slice revs to match the target size
156 """slice revs to match the target size
157
157
158 This is intended to be used on chunk that density slicing selected by that
158 This is intended to be used on chunk that density slicing selected by that
159 are still too large compared to the read garantee of revlog. This might
159 are still too large compared to the read garantee of revlog. This might
160 happens when "minimal gap size" interrupted the slicing or when chain are
160 happens when "minimal gap size" interrupted the slicing or when chain are
161 built in a way that create large blocks next to each other.
161 built in a way that create large blocks next to each other.
162
162
163 >>> data = [
163 >>> data = [
164 ... 3, #0 (3)
164 ... 3, #0 (3)
165 ... 5, #1 (2)
165 ... 5, #1 (2)
166 ... 6, #2 (1)
166 ... 6, #2 (1)
167 ... 8, #3 (2)
167 ... 8, #3 (2)
168 ... 8, #4 (empty)
168 ... 8, #4 (empty)
169 ... 11, #5 (3)
169 ... 11, #5 (3)
170 ... 12, #6 (1)
170 ... 12, #6 (1)
171 ... 13, #7 (1)
171 ... 13, #7 (1)
172 ... 14, #8 (1)
172 ... 14, #8 (1)
173 ... ]
173 ... ]
174
174
175 == All snapshots cases ==
175 == All snapshots cases ==
176 >>> revlog = _testrevlog(data, snapshot=range(9))
176 >>> revlog = _testrevlog(data, snapshot=range(9))
177
177
178 Cases where chunk is already small enough
178 Cases where chunk is already small enough
179 >>> list(_slicechunktosize(revlog, [0], 3))
179 >>> list(_slicechunktosize(revlog, [0], 3))
180 [[0]]
180 [[0]]
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 [[6, 7]]
182 [[6, 7]]
183 >>> list(_slicechunktosize(revlog, [0], None))
183 >>> list(_slicechunktosize(revlog, [0], None))
184 [[0]]
184 [[0]]
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 [[6, 7]]
186 [[6, 7]]
187
187
188 cases where we need actual slicing
188 cases where we need actual slicing
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 [[0], [1]]
190 [[0], [1]]
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 [[1], [3]]
192 [[1], [3]]
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 [[1, 2], [3]]
194 [[1, 2], [3]]
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 [[3], [5]]
198 [[3], [5]]
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 [[5], [6, 7, 8]]
200 [[5], [6, 7, 8]]
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203
203
204 Case with too large individual chunk (must return valid chunk)
204 Case with too large individual chunk (must return valid chunk)
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 [[0], [1]]
206 [[0], [1]]
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 [[1], [3]]
208 [[1], [3]]
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 [[3], [5]]
210 [[3], [5]]
211
211
212 == No Snapshot cases ==
212 == No Snapshot cases ==
213 >>> revlog = _testrevlog(data)
213 >>> revlog = _testrevlog(data)
214
214
215 Cases where chunk is already small enough
215 Cases where chunk is already small enough
216 >>> list(_slicechunktosize(revlog, [0], 3))
216 >>> list(_slicechunktosize(revlog, [0], 3))
217 [[0]]
217 [[0]]
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 [[6, 7]]
219 [[6, 7]]
220 >>> list(_slicechunktosize(revlog, [0], None))
220 >>> list(_slicechunktosize(revlog, [0], None))
221 [[0]]
221 [[0]]
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 [[6, 7]]
223 [[6, 7]]
224
224
225 cases where we need actual slicing
225 cases where we need actual slicing
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 [[0], [1]]
227 [[0], [1]]
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 [[1], [3]]
229 [[1], [3]]
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 [[1], [2, 3]]
231 [[1], [2, 3]]
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 [[3], [5]]
233 [[3], [5]]
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 [[3], [4, 5]]
235 [[3], [4, 5]]
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 [[5], [6, 7, 8]]
237 [[5], [6, 7, 8]]
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240
240
241 Case with too large individual chunk (must return valid chunk)
241 Case with too large individual chunk (must return valid chunk)
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 [[0], [1]]
243 [[0], [1]]
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 [[1], [3]]
245 [[1], [3]]
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 [[3], [5]]
247 [[3], [5]]
248
248
249 == mixed case ==
249 == mixed case ==
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 """
253 """
254 assert targetsize is None or 0 <= targetsize
254 assert targetsize is None or 0 <= targetsize
255 startdata = revlog.start(revs[0])
255 startdata = revlog.start(revs[0])
256 enddata = revlog.end(revs[-1])
256 enddata = revlog.end(revs[-1])
257 fullspan = enddata - startdata
257 fullspan = enddata - startdata
258 if targetsize is None or fullspan <= targetsize:
258 if targetsize is None or fullspan <= targetsize:
259 yield revs
259 yield revs
260 return
260 return
261
261
262 startrevidx = 0
262 startrevidx = 0
263 endrevidx = 1
263 endrevidx = 1
264 iterrevs = enumerate(revs)
264 iterrevs = enumerate(revs)
265 next(iterrevs) # skip first rev.
265 next(iterrevs) # skip first rev.
266 # first step: get snapshots out of the way
266 # first step: get snapshots out of the way
267 for idx, r in iterrevs:
267 for idx, r in iterrevs:
268 span = revlog.end(r) - startdata
268 span = revlog.end(r) - startdata
269 snapshot = revlog.issnapshot(r)
269 snapshot = revlog.issnapshot(r)
270 if span <= targetsize and snapshot:
270 if span <= targetsize and snapshot:
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 else:
272 else:
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 if chunk:
274 if chunk:
275 yield chunk
275 yield chunk
276 startrevidx = idx
276 startrevidx = idx
277 startdata = revlog.start(r)
277 startdata = revlog.start(r)
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 if not snapshot:
279 if not snapshot:
280 break
280 break
281
281
282 # for the others, we use binary slicing to quickly converge toward valid
282 # for the others, we use binary slicing to quickly converge toward valid
283 # chunks (otherwise, we might end up looking for start/end of many
283 # chunks (otherwise, we might end up looking for start/end of many
284 # revisions). This logic is not looking for the perfect slicing point, it
284 # revisions). This logic is not looking for the perfect slicing point, it
285 # focuses on quickly converging toward valid chunks.
285 # focuses on quickly converging toward valid chunks.
286 nbitem = len(revs)
286 nbitem = len(revs)
287 while (enddata - startdata) > targetsize:
287 while (enddata - startdata) > targetsize:
288 endrevidx = nbitem
288 endrevidx = nbitem
289 if nbitem - startrevidx <= 1:
289 if nbitem - startrevidx <= 1:
290 break # protect against individual chunk larger than limit
290 break # protect against individual chunk larger than limit
291 localenddata = revlog.end(revs[endrevidx - 1])
291 localenddata = revlog.end(revs[endrevidx - 1])
292 span = localenddata - startdata
292 span = localenddata - startdata
293 while span > targetsize:
293 while span > targetsize:
294 if endrevidx - startrevidx <= 1:
294 if endrevidx - startrevidx <= 1:
295 break # protect against individual chunk larger than limit
295 break # protect against individual chunk larger than limit
296 endrevidx -= (endrevidx - startrevidx) // 2
296 endrevidx -= (endrevidx - startrevidx) // 2
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 if chunk:
300 if chunk:
301 yield chunk
301 yield chunk
302 startrevidx = endrevidx
302 startrevidx = endrevidx
303 startdata = revlog.start(revs[startrevidx])
303 startdata = revlog.start(revs[startrevidx])
304
304
305 chunk = _trimchunk(revlog, revs, startrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308
308
309
309
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 """slice revs to reduce the amount of unrelated data to be read from disk.
311 """slice revs to reduce the amount of unrelated data to be read from disk.
312
312
313 ``revs`` is sliced into groups that should be read in one time.
313 ``revs`` is sliced into groups that should be read in one time.
314 Assume that revs are sorted.
314 Assume that revs are sorted.
315
315
316 The initial chunk is sliced until the overall density (payload/chunks-span
316 The initial chunk is sliced until the overall density (payload/chunks-span
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 skipped.
318 skipped.
319
319
320 >>> revlog = _testrevlog([
320 >>> revlog = _testrevlog([
321 ... 5, #00 (5)
321 ... 5, #00 (5)
322 ... 10, #01 (5)
322 ... 10, #01 (5)
323 ... 12, #02 (2)
323 ... 12, #02 (2)
324 ... 12, #03 (empty)
324 ... 12, #03 (empty)
325 ... 27, #04 (15)
325 ... 27, #04 (15)
326 ... 31, #05 (4)
326 ... 31, #05 (4)
327 ... 31, #06 (empty)
327 ... 31, #06 (empty)
328 ... 42, #07 (11)
328 ... 42, #07 (11)
329 ... 47, #08 (5)
329 ... 47, #08 (5)
330 ... 47, #09 (empty)
330 ... 47, #09 (empty)
331 ... 48, #10 (1)
331 ... 48, #10 (1)
332 ... 51, #11 (3)
332 ... 51, #11 (3)
333 ... 74, #12 (23)
333 ... 74, #12 (23)
334 ... 85, #13 (11)
334 ... 85, #13 (11)
335 ... 86, #14 (1)
335 ... 86, #14 (1)
336 ... 91, #15 (5)
336 ... 91, #15 (5)
337 ... ])
337 ... ])
338
338
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 [[0], [15]]
342 [[0], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 [[0], [11], [15]]
344 [[0], [11], [15]]
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 [[0], [11, 13, 15]]
346 [[0], [11, 13, 15]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 ... mingapsize=20))
350 ... mingapsize=20))
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 ... targetdensity=0.95))
353 ... targetdensity=0.95))
354 [[1, 2], [5], [8, 10, 11], [14]]
354 [[1, 2], [5], [8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... targetdensity=0.95, mingapsize=12))
356 ... targetdensity=0.95, mingapsize=12))
357 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
358 """
358 """
359 start = revlog.start
359 start = revlog.start
360 length = revlog.length
360 length = revlog.length
361
361
362 if len(revs) <= 1:
362 if len(revs) <= 1:
363 yield revs
363 yield revs
364 return
364 return
365
365
366 deltachainspan = segmentspan(revlog, revs)
366 deltachainspan = segmentspan(revlog, revs)
367
367
368 if deltachainspan < mingapsize:
368 if deltachainspan < mingapsize:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 readdata = deltachainspan
372 readdata = deltachainspan
373 chainpayload = sum(length(r) for r in revs)
373 chainpayload = sum(length(r) for r in revs)
374
374
375 if deltachainspan:
375 if deltachainspan:
376 density = chainpayload / float(deltachainspan)
376 density = chainpayload / float(deltachainspan)
377 else:
377 else:
378 density = 1.0
378 density = 1.0
379
379
380 if density >= targetdensity:
380 if density >= targetdensity:
381 yield revs
381 yield revs
382 return
382 return
383
383
384 # Store the gaps in a heap to have them sorted by decreasing size
384 # Store the gaps in a heap to have them sorted by decreasing size
385 gaps = []
385 gaps = []
386 prevend = None
386 prevend = None
387 for i, rev in enumerate(revs):
387 for i, rev in enumerate(revs):
388 revstart = start(rev)
388 revstart = start(rev)
389 revlen = length(rev)
389 revlen = length(rev)
390
390
391 # Skip empty revisions to form larger holes
391 # Skip empty revisions to form larger holes
392 if revlen == 0:
392 if revlen == 0:
393 continue
393 continue
394
394
395 if prevend is not None:
395 if prevend is not None:
396 gapsize = revstart - prevend
396 gapsize = revstart - prevend
397 # only consider holes that are large enough
397 # only consider holes that are large enough
398 if gapsize > mingapsize:
398 if gapsize > mingapsize:
399 gaps.append((gapsize, i))
399 gaps.append((gapsize, i))
400
400
401 prevend = revstart + revlen
401 prevend = revstart + revlen
402 # sort the gaps to pop them from largest to small
402 # sort the gaps to pop them from largest to small
403 gaps.sort()
403 gaps.sort()
404
404
405 # Collect the indices of the largest holes until the density is acceptable
405 # Collect the indices of the largest holes until the density is acceptable
406 selected = []
406 selected = []
407 while gaps and density < targetdensity:
407 while gaps and density < targetdensity:
408 gapsize, gapidx = gaps.pop()
408 gapsize, gapidx = gaps.pop()
409
409
410 selected.append(gapidx)
410 selected.append(gapidx)
411
411
412 # the gap sizes are stored as negatives to be sorted decreasingly
412 # the gap sizes are stored as negatives to be sorted decreasingly
413 # by the heap
413 # by the heap
414 readdata -= gapsize
414 readdata -= gapsize
415 if readdata > 0:
415 if readdata > 0:
416 density = chainpayload / float(readdata)
416 density = chainpayload / float(readdata)
417 else:
417 else:
418 density = 1.0
418 density = 1.0
419 selected.sort()
419 selected.sort()
420
420
421 # Cut the revs at collected indices
421 # Cut the revs at collected indices
422 previdx = 0
422 previdx = 0
423 for idx in selected:
423 for idx in selected:
424
424
425 chunk = _trimchunk(revlog, revs, previdx, idx)
425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 if chunk:
426 if chunk:
427 yield chunk
427 yield chunk
428
428
429 previdx = idx
429 previdx = idx
430
430
431 chunk = _trimchunk(revlog, revs, previdx)
431 chunk = _trimchunk(revlog, revs, previdx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435
435
436 def _trimchunk(revlog, revs, startidx, endidx=None):
436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 """returns revs[startidx:endidx] without empty trailing revs
437 """returns revs[startidx:endidx] without empty trailing revs
438
438
439 Doctest Setup
439 Doctest Setup
440 >>> revlog = _testrevlog([
440 >>> revlog = _testrevlog([
441 ... 5, #0
441 ... 5, #0
442 ... 10, #1
442 ... 10, #1
443 ... 12, #2
443 ... 12, #2
444 ... 12, #3 (empty)
444 ... 12, #3 (empty)
445 ... 17, #4
445 ... 17, #4
446 ... 21, #5
446 ... 21, #5
447 ... 21, #6 (empty)
447 ... 21, #6 (empty)
448 ... ])
448 ... ])
449
449
450 Contiguous cases:
450 Contiguous cases:
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 [0, 1, 2, 3, 4, 5]
452 [0, 1, 2, 3, 4, 5]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 [0, 1, 2, 3, 4]
454 [0, 1, 2, 3, 4]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 [0, 1, 2]
456 [0, 1, 2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 [2]
458 [2]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 [3, 4, 5]
460 [3, 4, 5]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 [3, 4]
462 [3, 4]
463
463
464 Discontiguous cases:
464 Discontiguous cases:
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 [1, 3, 5]
466 [1, 3, 5]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 [1]
468 [1]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 [3, 5]
470 [3, 5]
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 [3, 5]
472 [3, 5]
473 """
473 """
474 length = revlog.length
474 length = revlog.length
475
475
476 if endidx is None:
476 if endidx is None:
477 endidx = len(revs)
477 endidx = len(revs)
478
478
479 # If we have a non-emtpy delta candidate, there are nothing to trim
479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 if revs[endidx - 1] < len(revlog):
480 if revs[endidx - 1] < len(revlog):
481 # Trim empty revs at the end, except the very first revision of a chain
481 # Trim empty revs at the end, except the very first revision of a chain
482 while (
482 while (
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 ):
484 ):
485 endidx -= 1
485 endidx -= 1
486
486
487 return revs[startidx:endidx]
487 return revs[startidx:endidx]
488
488
489
489
490 def segmentspan(revlog, revs):
490 def segmentspan(revlog, revs):
491 """Get the byte span of a segment of revisions
491 """Get the byte span of a segment of revisions
492
492
493 revs is a sorted array of revision numbers
493 revs is a sorted array of revision numbers
494
494
495 >>> revlog = _testrevlog([
495 >>> revlog = _testrevlog([
496 ... 5, #0
496 ... 5, #0
497 ... 10, #1
497 ... 10, #1
498 ... 12, #2
498 ... 12, #2
499 ... 12, #3 (empty)
499 ... 12, #3 (empty)
500 ... 17, #4
500 ... 17, #4
501 ... ])
501 ... ])
502
502
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 17
504 17
505 >>> segmentspan(revlog, [0, 4])
505 >>> segmentspan(revlog, [0, 4])
506 17
506 17
507 >>> segmentspan(revlog, [3, 4])
507 >>> segmentspan(revlog, [3, 4])
508 5
508 5
509 >>> segmentspan(revlog, [1, 2, 3,])
509 >>> segmentspan(revlog, [1, 2, 3,])
510 7
510 7
511 >>> segmentspan(revlog, [1, 3])
511 >>> segmentspan(revlog, [1, 3])
512 7
512 7
513 """
513 """
514 if not revs:
514 if not revs:
515 return 0
515 return 0
516 end = revlog.end(revs[-1])
516 end = revlog.end(revs[-1])
517 return end - revlog.start(revs[0])
517 return end - revlog.start(revs[0])
518
518
519
519
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 """build full text from a (base, delta) pair and other metadata"""
521 """build full text from a (base, delta) pair and other metadata"""
522 # special case deltas which replace entire base; no need to decode
522 # special case deltas which replace entire base; no need to decode
523 # base revision. this neatly avoids censored bases, which throw when
523 # base revision. this neatly avoids censored bases, which throw when
524 # they're decoded.
524 # they're decoded.
525 hlen = struct.calcsize(b">lll")
525 hlen = struct.calcsize(b">lll")
526 if delta[:hlen] == mdiff.replacediffheader(
526 if delta[:hlen] == mdiff.replacediffheader(
527 revlog.rawsize(baserev), len(delta) - hlen
527 revlog.rawsize(baserev), len(delta) - hlen
528 ):
528 ):
529 fulltext = delta[hlen:]
529 fulltext = delta[hlen:]
530 else:
530 else:
531 # deltabase is rawtext before changed by flag processors, which is
531 # deltabase is rawtext before changed by flag processors, which is
532 # equivalent to non-raw text
532 # equivalent to non-raw text
533 basetext = revlog.revision(baserev, _df=fh)
533 basetext = revlog.revision(baserev, _df=fh)
534 fulltext = mdiff.patch(basetext, delta)
534 fulltext = mdiff.patch(basetext, delta)
535
535
536 try:
536 try:
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 if validatehash:
538 if validatehash:
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 if flags & REVIDX_ISCENSORED:
540 if flags & REVIDX_ISCENSORED:
541 raise error.StorageError(
541 raise error.StorageError(
542 _(b'node %s is not censored') % expectednode
542 _(b'node %s is not censored') % expectednode
543 )
543 )
544 except error.CensoredNodeError:
544 except error.CensoredNodeError:
545 # must pass the censored index flag to add censored revisions
545 # must pass the censored index flag to add censored revisions
546 if not flags & REVIDX_ISCENSORED:
546 if not flags & REVIDX_ISCENSORED:
547 raise
547 raise
548 return fulltext
548 return fulltext
549
549
550
550
551 @attr.s(slots=True, frozen=True)
551 @attr.s(slots=True, frozen=True)
552 class _deltainfo:
552 class _deltainfo:
553 distance = attr.ib()
553 distance = attr.ib()
554 deltalen = attr.ib()
554 deltalen = attr.ib()
555 data = attr.ib()
555 data = attr.ib()
556 base = attr.ib()
556 base = attr.ib()
557 chainbase = attr.ib()
557 chainbase = attr.ib()
558 chainlen = attr.ib()
558 chainlen = attr.ib()
559 compresseddeltalen = attr.ib()
559 compresseddeltalen = attr.ib()
560 snapshotdepth = attr.ib()
560 snapshotdepth = attr.ib()
561
561
562
562
563 def drop_u_compression(delta):
563 def drop_u_compression(delta):
564 """turn into a "u" (no-compression) into no-compression without header
564 """turn into a "u" (no-compression) into no-compression without header
565
565
566 This is useful for revlog format that has better compression method.
566 This is useful for revlog format that has better compression method.
567 """
567 """
568 assert delta.data[0] == b'u', delta.data[0]
568 assert delta.data[0] == b'u', delta.data[0]
569 return _deltainfo(
569 return _deltainfo(
570 delta.distance,
570 delta.distance,
571 delta.deltalen - 1,
571 delta.deltalen - 1,
572 (b'', delta.data[1]),
572 (b'', delta.data[1]),
573 delta.base,
573 delta.base,
574 delta.chainbase,
574 delta.chainbase,
575 delta.chainlen,
575 delta.chainlen,
576 delta.compresseddeltalen,
576 delta.compresseddeltalen,
577 delta.snapshotdepth,
577 delta.snapshotdepth,
578 )
578 )
579
579
580
580
581 def is_good_delta_info(revlog, deltainfo, revinfo):
581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 """Returns True if the given delta is good. Good means that it is within
582 """Returns True if the given delta is good. Good means that it is within
583 the disk span, disk size, and chain length bounds that we know to be
583 the disk span, disk size, and chain length bounds that we know to be
584 performant."""
584 performant."""
585 if deltainfo is None:
585 if deltainfo is None:
586 return False
586 return False
587
587
588 if (
588 if (
589 revinfo.cachedelta is not None
589 revinfo.cachedelta is not None
590 and deltainfo.base == revinfo.cachedelta[0]
590 and deltainfo.base == revinfo.cachedelta[0]
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 ):
592 ):
593 return True
593 return True
594
594
595 # - 'deltainfo.distance' is the distance from the base revision --
595 # - 'deltainfo.distance' is the distance from the base revision --
596 # bounding it limits the amount of I/O we need to do.
596 # bounding it limits the amount of I/O we need to do.
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 # deltas we need to apply -- bounding it limits the amount of CPU
598 # deltas we need to apply -- bounding it limits the amount of CPU
599 # we consume.
599 # we consume.
600
600
601 textlen = revinfo.textlen
601 textlen = revinfo.textlen
602 defaultmax = textlen * 4
602 defaultmax = textlen * 4
603 maxdist = revlog._maxdeltachainspan
603 maxdist = revlog._maxdeltachainspan
604 if not maxdist:
604 if not maxdist:
605 maxdist = deltainfo.distance # ensure the conditional pass
605 maxdist = deltainfo.distance # ensure the conditional pass
606 maxdist = max(maxdist, defaultmax)
606 maxdist = max(maxdist, defaultmax)
607
607
608 # Bad delta from read span:
608 # Bad delta from read span:
609 #
609 #
610 # If the span of data read is larger than the maximum allowed.
610 # If the span of data read is larger than the maximum allowed.
611 #
611 #
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 # to avoid issue related to the span of data. In theory, it would be
613 # to avoid issue related to the span of data. In theory, it would be
614 # possible to build pathological revlog where delta pattern would lead
614 # possible to build pathological revlog where delta pattern would lead
615 # to too many reads. However, they do not happen in practice at all. So
615 # to too many reads. However, they do not happen in practice at all. So
616 # we skip the span check entirely.
616 # we skip the span check entirely.
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 return False
618 return False
619
619
620 # Bad delta from new delta size:
620 # Bad delta from new delta size:
621 #
621 #
622 # If the delta size is larger than the target text, storing the
622 # If the delta size is larger than the target text, storing the
623 # delta will be inefficient.
623 # delta will be inefficient.
624 if textlen < deltainfo.deltalen:
624 if textlen < deltainfo.deltalen:
625 return False
625 return False
626
626
627 # Bad delta from cumulated payload size:
627 # Bad delta from cumulated payload size:
628 #
628 #
629 # If the sum of delta get larger than K * target text length.
629 # If the sum of delta get larger than K * target text length.
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 return False
631 return False
632
632
633 # Bad delta from chain length:
633 # Bad delta from chain length:
634 #
634 #
635 # If the number of delta in the chain gets too high.
635 # If the number of delta in the chain gets too high.
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 return False
637 return False
638
638
639 # bad delta from intermediate snapshot size limit
639 # bad delta from intermediate snapshot size limit
640 #
640 #
641 # If an intermediate snapshot size is higher than the limit. The
641 # If an intermediate snapshot size is higher than the limit. The
642 # limit exist to prevent endless chain of intermediate delta to be
642 # limit exist to prevent endless chain of intermediate delta to be
643 # created.
643 # created.
644 if (
644 if (
645 deltainfo.snapshotdepth is not None
645 deltainfo.snapshotdepth is not None
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta if new intermediate snapshot is larger than the previous
650 # bad delta if new intermediate snapshot is larger than the previous
651 # snapshot
651 # snapshot
652 if (
652 if (
653 deltainfo.snapshotdepth
653 deltainfo.snapshotdepth
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 ):
655 ):
656 return False
656 return False
657
657
658 return True
658 return True
659
659
660
660
661 # If a revision's full text is that much bigger than a base candidate full
661 # If a revision's full text is that much bigger than a base candidate full
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 # consider these candidates.
663 # consider these candidates.
664 LIMIT_BASE2TEXT = 500
664 LIMIT_BASE2TEXT = 500
665
665
666
666
667 def _candidategroups(
667 def _candidategroups(
668 revlog,
668 revlog,
669 textlen,
669 textlen,
670 p1,
670 p1,
671 p2,
671 p2,
672 cachedelta,
672 cachedelta,
673 excluded_bases=None,
673 excluded_bases=None,
674 target_rev=None,
674 target_rev=None,
675 snapshot_cache=None,
675 snapshot_cache=None,
676 ):
676 ):
677 """Provides group of revision to be tested as delta base
677 """Provides group of revision to be tested as delta base
678
678
679 This top level function focus on emitting groups with unique and worthwhile
679 This top level function focus on emitting groups with unique and worthwhile
680 content. See _raw_candidate_groups for details about the group order.
680 content. See _raw_candidate_groups for details about the group order.
681 """
681 """
682 # should we try to build a delta?
682 # should we try to build a delta?
683 if not (len(revlog) and revlog._storedeltachains):
683 if not (len(revlog) and revlog._storedeltachains):
684 yield None
684 yield None
685 return
685 return
686
686
687 if target_rev is None:
687 if target_rev is None:
688 target_rev = len(revlog)
688 target_rev = len(revlog)
689
689
690 if not revlog._generaldelta:
690 if not revlog._generaldelta:
691 # before general delta, there is only one possible delta base
691 # before general delta, there is only one possible delta base
692 yield (target_rev - 1,)
692 yield (target_rev - 1,)
693 yield None
693 yield None
694 return
694 return
695
695
696 if (
696 if (
697 cachedelta is not None
697 cachedelta is not None
698 and nullrev == cachedelta[0]
698 and nullrev == cachedelta[0]
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
700 ):
700 ):
701 # instruction are to forcibly do a full snapshot
701 # instruction are to forcibly do a full snapshot
702 yield None
702 yield None
703 return
703 return
704
704
705 deltalength = revlog.length
705 deltalength = revlog.length
706 deltaparent = revlog.deltaparent
706 deltaparent = revlog.deltaparent
707 sparse = revlog._sparserevlog
707 sparse = revlog._sparserevlog
708 good = None
708 good = None
709
709
710 deltas_limit = textlen * LIMIT_DELTA2TEXT
710 deltas_limit = textlen * LIMIT_DELTA2TEXT
711 group_chunk_size = revlog._candidate_group_chunk_size
711 group_chunk_size = revlog._candidate_group_chunk_size
712
712
713 tested = {nullrev}
713 tested = {nullrev}
714 candidates = _refinedgroups(
714 candidates = _refinedgroups(
715 revlog,
715 revlog,
716 p1,
716 p1,
717 p2,
717 p2,
718 cachedelta,
718 cachedelta,
719 snapshot_cache=snapshot_cache,
719 snapshot_cache=snapshot_cache,
720 )
720 )
721 while True:
721 while True:
722 temptative = candidates.send(good)
722 temptative = candidates.send(good)
723 if temptative is None:
723 if temptative is None:
724 break
724 break
725 group = []
725 group = []
726 for rev in temptative:
726 for rev in temptative:
727 # skip over empty delta (no need to include them in a chain)
727 # skip over empty delta (no need to include them in a chain)
728 while not (rev == nullrev or rev in tested or deltalength(rev)):
728 while not (rev == nullrev or rev in tested or deltalength(rev)):
729 tested.add(rev)
729 tested.add(rev)
730 rev = deltaparent(rev)
730 rev = deltaparent(rev)
731 # no need to try a delta against nullrev, this will be done as a
731 # no need to try a delta against nullrev, this will be done as a
732 # last resort.
732 # last resort.
733 if rev == nullrev:
733 if rev == nullrev:
734 continue
734 continue
735 # filter out revision we tested already
735 # filter out revision we tested already
736 if rev in tested:
736 if rev in tested:
737 continue
737 continue
738
738
739 if (
739 if (
740 cachedelta is not None
740 cachedelta is not None
741 and rev == cachedelta[0]
741 and rev == cachedelta[0]
742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
743 ):
743 ):
744 # instructions are to forcibly consider/use this delta base
744 # instructions are to forcibly consider/use this delta base
745 group.append(rev)
745 group.append(rev)
746 continue
746 continue
747
747
748 # an higher authority deamed the base unworthy (e.g. censored)
748 # an higher authority deamed the base unworthy (e.g. censored)
749 if excluded_bases is not None and rev in excluded_bases:
749 if excluded_bases is not None and rev in excluded_bases:
750 tested.add(rev)
750 tested.add(rev)
751 continue
751 continue
752 # We are in some recomputation cases and that rev is too high in
752 # We are in some recomputation cases and that rev is too high in
753 # the revlog
753 # the revlog
754 if target_rev is not None and rev >= target_rev:
754 if target_rev is not None and rev >= target_rev:
755 tested.add(rev)
755 tested.add(rev)
756 continue
756 continue
757 # filter out delta base that will never produce good delta
757 # filter out delta base that will never produce good delta
758 if deltas_limit < revlog.length(rev):
758 if deltas_limit < revlog.length(rev):
759 tested.add(rev)
759 tested.add(rev)
760 continue
760 continue
761 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
761 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
762 tested.add(rev)
762 tested.add(rev)
763 continue
763 continue
764 # no delta for rawtext-changing revs (see "candelta" for why)
764 # no delta for rawtext-changing revs (see "candelta" for why)
765 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
765 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 tested.add(rev)
766 tested.add(rev)
767 continue
767 continue
768
768
769 # If we reach here, we are about to build and test a delta.
769 # If we reach here, we are about to build and test a delta.
770 # The delta building process will compute the chaininfo in all
770 # The delta building process will compute the chaininfo in all
771 # case, since that computation is cached, it is fine to access it
771 # case, since that computation is cached, it is fine to access it
772 # here too.
772 # here too.
773 chainlen, chainsize = revlog._chaininfo(rev)
773 chainlen, chainsize = revlog._chaininfo(rev)
774 # if chain will be too long, skip base
774 # if chain will be too long, skip base
775 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
775 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
776 tested.add(rev)
776 tested.add(rev)
777 continue
777 continue
778 # if chain already have too much data, skip base
778 # if chain already have too much data, skip base
779 if deltas_limit < chainsize:
779 if deltas_limit < chainsize:
780 tested.add(rev)
780 tested.add(rev)
781 continue
781 continue
782 if sparse and revlog.upperboundcomp is not None:
782 if sparse and revlog.upperboundcomp is not None:
783 maxcomp = revlog.upperboundcomp
783 maxcomp = revlog.upperboundcomp
784 basenotsnap = (p1, p2, nullrev)
784 basenotsnap = (p1, p2, nullrev)
785 if rev not in basenotsnap and revlog.issnapshot(rev):
785 if rev not in basenotsnap and revlog.issnapshot(rev):
786 snapshotdepth = revlog.snapshotdepth(rev)
786 snapshotdepth = revlog.snapshotdepth(rev)
787 # If text is significantly larger than the base, we can
787 # If text is significantly larger than the base, we can
788 # expect the resulting delta to be proportional to the size
788 # expect the resulting delta to be proportional to the size
789 # difference
789 # difference
790 revsize = revlog.rawsize(rev)
790 revsize = revlog.rawsize(rev)
791 rawsizedistance = max(textlen - revsize, 0)
791 rawsizedistance = max(textlen - revsize, 0)
792 # use an estimate of the compression upper bound.
792 # use an estimate of the compression upper bound.
793 lowestrealisticdeltalen = rawsizedistance // maxcomp
793 lowestrealisticdeltalen = rawsizedistance // maxcomp
794
794
795 # check the absolute constraint on the delta size
795 # check the absolute constraint on the delta size
796 snapshotlimit = textlen >> snapshotdepth
796 snapshotlimit = textlen >> snapshotdepth
797 if snapshotlimit < lowestrealisticdeltalen:
797 if snapshotlimit < lowestrealisticdeltalen:
798 # delta lower bound is larger than accepted upper bound
798 # delta lower bound is larger than accepted upper bound
799 tested.add(rev)
799 tested.add(rev)
800 continue
800 continue
801
801
802 # check the relative constraint on the delta size
802 # check the relative constraint on the delta size
803 revlength = revlog.length(rev)
803 revlength = revlog.length(rev)
804 if revlength < lowestrealisticdeltalen:
804 if revlength < lowestrealisticdeltalen:
805 # delta probable lower bound is larger than target base
805 # delta probable lower bound is larger than target base
806 tested.add(rev)
806 tested.add(rev)
807 continue
807 continue
808
808
809 group.append(rev)
809 group.append(rev)
810 if group:
810 if group:
811 # When the size of the candidate group is big, it can result in a
811 # When the size of the candidate group is big, it can result in a
812 # quite significant performance impact. To reduce this, we can send
812 # quite significant performance impact. To reduce this, we can send
813 # them in smaller batches until the new batch does not provide any
813 # them in smaller batches until the new batch does not provide any
814 # improvements.
814 # improvements.
815 #
815 #
816 # This might reduce the overall efficiency of the compression in
816 # This might reduce the overall efficiency of the compression in
817 # some corner cases, but that should also prevent very pathological
817 # some corner cases, but that should also prevent very pathological
818 # cases from being an issue. (eg. 20 000 candidates).
818 # cases from being an issue. (eg. 20 000 candidates).
819 #
819 #
820 # XXX note that the ordering of the group becomes important as it
820 # XXX note that the ordering of the group becomes important as it
821 # now impacts the final result. The current order is unprocessed
821 # now impacts the final result. The current order is unprocessed
822 # and can be improved.
822 # and can be improved.
823 if group_chunk_size == 0:
823 if group_chunk_size == 0:
824 tested.update(group)
824 tested.update(group)
825 good = yield tuple(group)
825 good = yield tuple(group)
826 else:
826 else:
827 prev_good = good
827 prev_good = good
828 for start in range(0, len(group), group_chunk_size):
828 for start in range(0, len(group), group_chunk_size):
829 sub_group = group[start : start + group_chunk_size]
829 sub_group = group[start : start + group_chunk_size]
830 tested.update(sub_group)
830 tested.update(sub_group)
831 good = yield tuple(sub_group)
831 good = yield tuple(sub_group)
832 if prev_good == good:
832 if prev_good == good:
833 break
833 break
834
834
835 yield None
835 yield None
836
836
837
837
838 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
838 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
839 good = None
839 good = None
840 # First we try to reuse a the delta contained in the bundle.
840 # First we try to reuse a the delta contained in the bundle.
841 # (or from the source revlog)
841 # (or from the source revlog)
842 #
842 #
843 # This logic only applies to general delta repositories and can be disabled
843 # This logic only applies to general delta repositories and can be disabled
844 # through configuration. Disabling reuse source delta is useful when
844 # through configuration. Disabling reuse source delta is useful when
845 # we want to make sure we recomputed "optimal" deltas.
845 # we want to make sure we recomputed "optimal" deltas.
846 debug_info = None
846 debug_info = None
847 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
847 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
848 # Assume what we received from the server is a good choice
848 # Assume what we received from the server is a good choice
849 # build delta will reuse the cache
849 # build delta will reuse the cache
850 if debug_info is not None:
850 if debug_info is not None:
851 debug_info['cached-delta.tested'] += 1
851 debug_info['cached-delta.tested'] += 1
852 good = yield (cachedelta[0],)
852 good = yield (cachedelta[0],)
853 if good is not None:
853 if good is not None:
854 if debug_info is not None:
854 if debug_info is not None:
855 debug_info['cached-delta.accepted'] += 1
855 debug_info['cached-delta.accepted'] += 1
856 yield None
856 yield None
857 return
857 return
858 if snapshot_cache is None:
858 if snapshot_cache is None:
859 snapshot_cache = SnapshotCache()
859 snapshot_cache = SnapshotCache()
860 groups = _rawgroups(
860 groups = _rawgroups(
861 revlog,
861 revlog,
862 p1,
862 p1,
863 p2,
863 p2,
864 cachedelta,
864 cachedelta,
865 snapshot_cache,
865 snapshot_cache,
866 )
866 )
867 for candidates in groups:
867 for candidates in groups:
868 good = yield candidates
868 good = yield candidates
869 if good is not None:
869 if good is not None:
870 break
870 break
871
871
872 # If sparse revlog is enabled, we can try to refine the available deltas
872 # If sparse revlog is enabled, we can try to refine the available deltas
873 if not revlog._sparserevlog:
873 if not revlog._sparserevlog:
874 yield None
874 yield None
875 return
875 return
876
876
877 # if we have a refinable value, try to refine it
877 # if we have a refinable value, try to refine it
878 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
878 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
879 # refine snapshot down
879 # refine snapshot down
880 previous = None
880 previous = None
881 while previous != good:
881 while previous != good:
882 previous = good
882 previous = good
883 base = revlog.deltaparent(good)
883 base = revlog.deltaparent(good)
884 if base == nullrev:
884 if base == nullrev:
885 break
885 break
886 good = yield (base,)
886 good = yield (base,)
887 # refine snapshot up
887 # refine snapshot up
888 if not snapshot_cache.snapshots:
888 if not snapshot_cache.snapshots:
889 snapshot_cache.update(revlog, good + 1)
889 snapshot_cache.update(revlog, good + 1)
890 previous = None
890 previous = None
891 while good != previous:
891 while good != previous:
892 previous = good
892 previous = good
893 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
893 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
894 good = yield children
894 good = yield children
895
895
896 if debug_info is not None:
896 if debug_info is not None:
897 if good is None:
897 if good is None:
898 debug_info['no-solution'] += 1
898 debug_info['no-solution'] += 1
899
899
900 yield None
900 yield None
901
901
902
902
903 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
903 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
904 """Provides group of revision to be tested as delta base
904 """Provides group of revision to be tested as delta base
905
905
906 This lower level function focus on emitting delta theorically interresting
906 This lower level function focus on emitting delta theorically interresting
907 without looking it any practical details.
907 without looking it any practical details.
908
908
909 The group order aims at providing fast or small candidates first.
909 The group order aims at providing fast or small candidates first.
910 """
910 """
911 # Why search for delta base if we cannot use a delta base ?
911 # Why search for delta base if we cannot use a delta base ?
912 assert revlog._generaldelta
912 assert revlog._generaldelta
913 # also see issue6056
913 # also see issue6056
914 sparse = revlog._sparserevlog
914 sparse = revlog._sparserevlog
915 curr = len(revlog)
915 curr = len(revlog)
916 prev = curr - 1
916 prev = curr - 1
917 deltachain = lambda rev: revlog._deltachain(rev)[0]
917 deltachain = lambda rev: revlog._deltachain(rev)[0]
918
918
919 # exclude already lazy tested base if any
919 # exclude already lazy tested base if any
920 parents = [p for p in (p1, p2) if p != nullrev]
920 parents = [p for p in (p1, p2) if p != nullrev]
921
921
922 if not revlog._deltabothparents and len(parents) == 2:
922 if not revlog._deltabothparents and len(parents) == 2:
923 parents.sort()
923 parents.sort()
924 # To minimize the chance of having to build a fulltext,
924 # To minimize the chance of having to build a fulltext,
925 # pick first whichever parent is closest to us (max rev)
925 # pick first whichever parent is closest to us (max rev)
926 yield (parents[1],)
926 yield (parents[1],)
927 # then the other one (min rev) if the first did not fit
927 # then the other one (min rev) if the first did not fit
928 yield (parents[0],)
928 yield (parents[0],)
929 elif len(parents) > 0:
929 elif len(parents) > 0:
930 # Test all parents (1 or 2), and keep the best candidate
930 # Test all parents (1 or 2), and keep the best candidate
931 yield parents
931 yield parents
932
932
933 if sparse and parents:
933 if sparse and parents:
934 if snapshot_cache is None:
934 if snapshot_cache is None:
935 # map: base-rev: [snapshot-revs]
935 # map: base-rev: [snapshot-revs]
936 snapshot_cache = SnapshotCache()
936 snapshot_cache = SnapshotCache()
937 # See if we can use an existing snapshot in the parent chains to use as
937 # See if we can use an existing snapshot in the parent chains to use as
938 # a base for a new intermediate-snapshot
938 # a base for a new intermediate-snapshot
939 #
939 #
940 # search for snapshot in parents delta chain
940 # search for snapshot in parents delta chain
941 # map: snapshot-level: snapshot-rev
941 # map: snapshot-level: snapshot-rev
942 parents_snaps = collections.defaultdict(set)
942 parents_snaps = collections.defaultdict(set)
943 candidate_chains = [deltachain(p) for p in parents]
943 candidate_chains = [deltachain(p) for p in parents]
944 for chain in candidate_chains:
944 for chain in candidate_chains:
945 for idx, s in enumerate(chain):
945 for idx, s in enumerate(chain):
946 if not revlog.issnapshot(s):
946 if not revlog.issnapshot(s):
947 break
947 break
948 parents_snaps[idx].add(s)
948 parents_snaps[idx].add(s)
949 snapfloor = min(parents_snaps[0]) + 1
949 snapfloor = min(parents_snaps[0]) + 1
950 snapshot_cache.update(revlog, snapfloor)
950 snapshot_cache.update(revlog, snapfloor)
951 # search for the highest "unrelated" revision
951 # search for the highest "unrelated" revision
952 #
952 #
953 # Adding snapshots used by "unrelated" revision increase the odd we
953 # Adding snapshots used by "unrelated" revision increase the odd we
954 # reuse an independant, yet better snapshot chain.
954 # reuse an independant, yet better snapshot chain.
955 #
955 #
956 # XXX instead of building a set of revisions, we could lazily enumerate
956 # XXX instead of building a set of revisions, we could lazily enumerate
957 # over the chains. That would be more efficient, however we stick to
957 # over the chains. That would be more efficient, however we stick to
958 # simple code for now.
958 # simple code for now.
959 all_revs = set()
959 all_revs = set()
960 for chain in candidate_chains:
960 for chain in candidate_chains:
961 all_revs.update(chain)
961 all_revs.update(chain)
962 other = None
962 other = None
963 for r in revlog.revs(prev, snapfloor):
963 for r in revlog.revs(prev, snapfloor):
964 if r not in all_revs:
964 if r not in all_revs:
965 other = r
965 other = r
966 break
966 break
967 if other is not None:
967 if other is not None:
968 # To avoid unfair competition, we won't use unrelated intermediate
968 # To avoid unfair competition, we won't use unrelated intermediate
969 # snapshot that are deeper than the ones from the parent delta
969 # snapshot that are deeper than the ones from the parent delta
970 # chain.
970 # chain.
971 max_depth = max(parents_snaps.keys())
971 max_depth = max(parents_snaps.keys())
972 chain = deltachain(other)
972 chain = deltachain(other)
973 for depth, s in enumerate(chain):
973 for depth, s in enumerate(chain):
974 if s < snapfloor:
974 if s < snapfloor:
975 continue
975 continue
976 if max_depth < depth:
976 if max_depth < depth:
977 break
977 break
978 if not revlog.issnapshot(s):
978 if not revlog.issnapshot(s):
979 break
979 break
980 parents_snaps[depth].add(s)
980 parents_snaps[depth].add(s)
981 # Test them as possible intermediate snapshot base
981 # Test them as possible intermediate snapshot base
982 # We test them from highest to lowest level. High level one are more
982 # We test them from highest to lowest level. High level one are more
983 # likely to result in small delta
983 # likely to result in small delta
984 floor = None
984 floor = None
985 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
985 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
986 siblings = set()
986 siblings = set()
987 for s in snaps:
987 for s in snaps:
988 siblings.update(snapshot_cache.snapshots[s])
988 siblings.update(snapshot_cache.snapshots[s])
989 # Before considering making a new intermediate snapshot, we check
989 # Before considering making a new intermediate snapshot, we check
990 # if an existing snapshot, children of base we consider, would be
990 # if an existing snapshot, children of base we consider, would be
991 # suitable.
991 # suitable.
992 #
992 #
993 # It give a change to reuse a delta chain "unrelated" to the
993 # It give a change to reuse a delta chain "unrelated" to the
994 # current revision instead of starting our own. Without such
994 # current revision instead of starting our own. Without such
995 # re-use, topological branches would keep reopening new chains.
995 # re-use, topological branches would keep reopening new chains.
996 # Creating more and more snapshot as the repository grow.
996 # Creating more and more snapshot as the repository grow.
997
997
998 if floor is not None:
998 if floor is not None:
999 # We only do this for siblings created after the one in our
999 # We only do this for siblings created after the one in our
1000 # parent's delta chain. Those created before has less chances
1000 # parent's delta chain. Those created before has less chances
1001 # to be valid base since our ancestors had to create a new
1001 # to be valid base since our ancestors had to create a new
1002 # snapshot.
1002 # snapshot.
1003 siblings = [r for r in siblings if floor < r]
1003 siblings = [r for r in siblings if floor < r]
1004 yield tuple(sorted(siblings))
1004 yield tuple(sorted(siblings))
1005 # then test the base from our parent's delta chain.
1005 # then test the base from our parent's delta chain.
1006 yield tuple(sorted(snaps))
1006 yield tuple(sorted(snaps))
1007 floor = min(snaps)
1007 floor = min(snaps)
1008 # No suitable base found in the parent chain, search if any full
1008 # No suitable base found in the parent chain, search if any full
1009 # snapshots emitted since parent's base would be a suitable base for an
1009 # snapshots emitted since parent's base would be a suitable base for an
1010 # intermediate snapshot.
1010 # intermediate snapshot.
1011 #
1011 #
1012 # It give a chance to reuse a delta chain unrelated to the current
1012 # It give a chance to reuse a delta chain unrelated to the current
1013 # revisions instead of starting our own. Without such re-use,
1013 # revisions instead of starting our own. Without such re-use,
1014 # topological branches would keep reopening new full chains. Creating
1014 # topological branches would keep reopening new full chains. Creating
1015 # more and more snapshot as the repository grow.
1015 # more and more snapshot as the repository grow.
1016 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1016 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1017 yield tuple(sorted(full))
1017 yield tuple(sorted(full))
1018
1018
1019 if not sparse:
1019 if not sparse:
1020 # other approach failed try against prev to hopefully save us a
1020 # other approach failed try against prev to hopefully save us a
1021 # fulltext.
1021 # fulltext.
1022 yield (prev,)
1022 yield (prev,)
1023
1023
1024
1024
1025 class SnapshotCache:
1025 class SnapshotCache:
1026 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1026 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1027
1027
1028 def __init__(self):
1028 def __init__(self):
1029 self.snapshots = collections.defaultdict(set)
1029 self.snapshots = collections.defaultdict(set)
1030 self._start_rev = None
1030 self._start_rev = None
1031 self._end_rev = None
1031 self._end_rev = None
1032
1032
1033 def update(self, revlog, start_rev=0):
1033 def update(self, revlog, start_rev=0):
1034 """find snapshots from start_rev to tip"""
1034 """find snapshots from start_rev to tip"""
1035 nb_revs = len(revlog)
1035 nb_revs = len(revlog)
1036 end_rev = nb_revs - 1
1036 end_rev = nb_revs - 1
1037 if start_rev > end_rev:
1037 if start_rev > end_rev:
1038 return # range is empty
1038 return # range is empty
1039
1039
1040 if self._start_rev is None:
1040 if self._start_rev is None:
1041 assert self._end_rev is None
1041 assert self._end_rev is None
1042 self._update(revlog, start_rev, end_rev)
1042 self._update(revlog, start_rev, end_rev)
1043 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1043 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1044 if start_rev < self._start_rev:
1044 if start_rev < self._start_rev:
1045 self._update(revlog, start_rev, self._start_rev - 1)
1045 self._update(revlog, start_rev, self._start_rev - 1)
1046 if self._end_rev < end_rev:
1046 if self._end_rev < end_rev:
1047 self._update(revlog, self._end_rev + 1, end_rev)
1047 self._update(revlog, self._end_rev + 1, end_rev)
1048
1048
1049 if self._start_rev is None:
1049 if self._start_rev is None:
1050 assert self._end_rev is None
1050 assert self._end_rev is None
1051 self._end_rev = end_rev
1051 self._end_rev = end_rev
1052 self._start_rev = start_rev
1052 self._start_rev = start_rev
1053 else:
1053 else:
1054 self._start_rev = min(self._start_rev, start_rev)
1054 self._start_rev = min(self._start_rev, start_rev)
1055 self._end_rev = max(self._end_rev, end_rev)
1055 self._end_rev = max(self._end_rev, end_rev)
1056 assert self._start_rev <= self._end_rev, (
1056 assert self._start_rev <= self._end_rev, (
1057 self._start_rev,
1057 self._start_rev,
1058 self._end_rev,
1058 self._end_rev,
1059 )
1059 )
1060
1060
1061 def _update(self, revlog, start_rev, end_rev):
1061 def _update(self, revlog, start_rev, end_rev):
1062 """internal method that actually do update content"""
1062 """internal method that actually do update content"""
1063 assert self._start_rev is None or (
1063 assert self._start_rev is None or (
1064 start_rev < self._start_rev or start_rev > self._end_rev
1064 start_rev < self._start_rev or start_rev > self._end_rev
1065 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1066 assert self._start_rev is None or (
1066 assert self._start_rev is None or (
1067 end_rev < self._start_rev or end_rev > self._end_rev
1067 end_rev < self._start_rev or end_rev > self._end_rev
1068 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1068 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1069 cache = self.snapshots
1069 cache = self.snapshots
1070 if util.safehasattr(revlog.index, b'findsnapshots'):
1070 if util.safehasattr(revlog.index, b'findsnapshots'):
1071 revlog.index.findsnapshots(cache, start_rev, end_rev)
1071 revlog.index.findsnapshots(cache, start_rev, end_rev)
1072 else:
1072 else:
1073 deltaparent = revlog.deltaparent
1073 deltaparent = revlog.deltaparent
1074 issnapshot = revlog.issnapshot
1074 issnapshot = revlog.issnapshot
1075 for rev in revlog.revs(start_rev, end_rev):
1075 for rev in revlog.revs(start_rev, end_rev):
1076 if issnapshot(rev):
1076 if issnapshot(rev):
1077 cache[deltaparent(rev)].add(rev)
1077 cache[deltaparent(rev)].add(rev)
1078
1078
1079
1079
1080 class deltacomputer:
1080 class deltacomputer:
1081 def __init__(
1081 def __init__(
1082 self,
1082 self,
1083 revlog,
1083 revlog,
1084 write_debug=None,
1084 write_debug=None,
1085 debug_search=False,
1085 debug_search=False,
1086 debug_info=None,
1086 debug_info=None,
1087 ):
1087 ):
1088 self.revlog = revlog
1088 self.revlog = revlog
1089 self._write_debug = write_debug
1089 self._write_debug = write_debug
1090 self._debug_search = debug_search
1090 if write_debug is None:
1091 self._debug_search = False
1092 else:
1093 self._debug_search = debug_search
1091 self._debug_info = debug_info
1094 self._debug_info = debug_info
1092 self._snapshot_cache = SnapshotCache()
1095 self._snapshot_cache = SnapshotCache()
1093
1096
1094 @property
1097 @property
1095 def _gather_debug(self):
1098 def _gather_debug(self):
1096 return self._write_debug is not None or self._debug_info is not None
1099 return self._write_debug is not None or self._debug_info is not None
1097
1100
1098 def buildtext(self, revinfo, fh):
1101 def buildtext(self, revinfo, fh):
1099 """Builds a fulltext version of a revision
1102 """Builds a fulltext version of a revision
1100
1103
1101 revinfo: revisioninfo instance that contains all needed info
1104 revinfo: revisioninfo instance that contains all needed info
1102 fh: file handle to either the .i or the .d revlog file,
1105 fh: file handle to either the .i or the .d revlog file,
1103 depending on whether it is inlined or not
1106 depending on whether it is inlined or not
1104 """
1107 """
1105 btext = revinfo.btext
1108 btext = revinfo.btext
1106 if btext[0] is not None:
1109 if btext[0] is not None:
1107 return btext[0]
1110 return btext[0]
1108
1111
1109 revlog = self.revlog
1112 revlog = self.revlog
1110 cachedelta = revinfo.cachedelta
1113 cachedelta = revinfo.cachedelta
1111 baserev = cachedelta[0]
1114 baserev = cachedelta[0]
1112 delta = cachedelta[1]
1115 delta = cachedelta[1]
1113
1116
1114 fulltext = btext[0] = _textfromdelta(
1117 fulltext = btext[0] = _textfromdelta(
1115 fh,
1118 fh,
1116 revlog,
1119 revlog,
1117 baserev,
1120 baserev,
1118 delta,
1121 delta,
1119 revinfo.p1,
1122 revinfo.p1,
1120 revinfo.p2,
1123 revinfo.p2,
1121 revinfo.flags,
1124 revinfo.flags,
1122 revinfo.node,
1125 revinfo.node,
1123 )
1126 )
1124 return fulltext
1127 return fulltext
1125
1128
1126 def _builddeltadiff(self, base, revinfo, fh):
1129 def _builddeltadiff(self, base, revinfo, fh):
1127 revlog = self.revlog
1130 revlog = self.revlog
1128 t = self.buildtext(revinfo, fh)
1131 t = self.buildtext(revinfo, fh)
1129 if revlog.iscensored(base):
1132 if revlog.iscensored(base):
1130 # deltas based on a censored revision must replace the
1133 # deltas based on a censored revision must replace the
1131 # full content in one patch, so delta works everywhere
1134 # full content in one patch, so delta works everywhere
1132 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1135 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1133 delta = header + t
1136 delta = header + t
1134 else:
1137 else:
1135 ptext = revlog.rawdata(base, _df=fh)
1138 ptext = revlog.rawdata(base, _df=fh)
1136 delta = mdiff.textdiff(ptext, t)
1139 delta = mdiff.textdiff(ptext, t)
1137
1140
1138 return delta
1141 return delta
1139
1142
1140 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1143 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1141 # can we use the cached delta?
1144 # can we use the cached delta?
1142 revlog = self.revlog
1145 revlog = self.revlog
1143 debug_search = self._write_debug is not None and self._debug_search
1144 chainbase = revlog.chainbase(base)
1146 chainbase = revlog.chainbase(base)
1145 if revlog._generaldelta:
1147 if revlog._generaldelta:
1146 deltabase = base
1148 deltabase = base
1147 else:
1149 else:
1148 if target_rev is not None and base != target_rev - 1:
1150 if target_rev is not None and base != target_rev - 1:
1149 msg = (
1151 msg = (
1150 b'general delta cannot use delta for something else '
1152 b'general delta cannot use delta for something else '
1151 b'than `prev`: %d<-%d'
1153 b'than `prev`: %d<-%d'
1152 )
1154 )
1153 msg %= (base, target_rev)
1155 msg %= (base, target_rev)
1154 raise error.ProgrammingError(msg)
1156 raise error.ProgrammingError(msg)
1155 deltabase = chainbase
1157 deltabase = chainbase
1156 snapshotdepth = None
1158 snapshotdepth = None
1157 if revlog._sparserevlog and deltabase == nullrev:
1159 if revlog._sparserevlog and deltabase == nullrev:
1158 snapshotdepth = 0
1160 snapshotdepth = 0
1159 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1161 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1160 # A delta chain should always be one full snapshot,
1162 # A delta chain should always be one full snapshot,
1161 # zero or more semi-snapshots, and zero or more deltas
1163 # zero or more semi-snapshots, and zero or more deltas
1162 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1164 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1163 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1165 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1164 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1166 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1165 delta = None
1167 delta = None
1166 if revinfo.cachedelta:
1168 if revinfo.cachedelta:
1167 cachebase = revinfo.cachedelta[0]
1169 cachebase = revinfo.cachedelta[0]
1168 # check if the diff still apply
1170 # check if the diff still apply
1169 currentbase = cachebase
1171 currentbase = cachebase
1170 while (
1172 while (
1171 currentbase != nullrev
1173 currentbase != nullrev
1172 and currentbase != base
1174 and currentbase != base
1173 and self.revlog.length(currentbase) == 0
1175 and self.revlog.length(currentbase) == 0
1174 ):
1176 ):
1175 currentbase = self.revlog.deltaparent(currentbase)
1177 currentbase = self.revlog.deltaparent(currentbase)
1176 if self.revlog._lazydelta and currentbase == base:
1178 if self.revlog._lazydelta and currentbase == base:
1177 delta = revinfo.cachedelta[1]
1179 delta = revinfo.cachedelta[1]
1178 if delta is None:
1180 if delta is None:
1179 delta = self._builddeltadiff(base, revinfo, fh)
1181 delta = self._builddeltadiff(base, revinfo, fh)
1180 if debug_search:
1182 if self._debug_search:
1181 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1183 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1182 msg %= len(delta)
1184 msg %= len(delta)
1183 self._write_debug(msg)
1185 self._write_debug(msg)
1184 # snapshotdept need to be neither None nor 0 level snapshot
1186 # snapshotdept need to be neither None nor 0 level snapshot
1185 if revlog.upperboundcomp is not None and snapshotdepth:
1187 if revlog.upperboundcomp is not None and snapshotdepth:
1186 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1188 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1187 snapshotlimit = revinfo.textlen >> snapshotdepth
1189 snapshotlimit = revinfo.textlen >> snapshotdepth
1188 if debug_search:
1190 if self._debug_search:
1189 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1191 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1190 msg %= lowestrealisticdeltalen
1192 msg %= lowestrealisticdeltalen
1191 self._write_debug(msg)
1193 self._write_debug(msg)
1192 if snapshotlimit < lowestrealisticdeltalen:
1194 if snapshotlimit < lowestrealisticdeltalen:
1193 if debug_search:
1195 if self._debug_search:
1194 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1196 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1195 self._write_debug(msg)
1197 self._write_debug(msg)
1196 return None
1198 return None
1197 if revlog.length(base) < lowestrealisticdeltalen:
1199 if revlog.length(base) < lowestrealisticdeltalen:
1198 if debug_search:
1200 if self._debug_search:
1199 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1201 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1200 self._write_debug(msg)
1202 self._write_debug(msg)
1201 return None
1203 return None
1202 header, data = revlog.compress(delta)
1204 header, data = revlog.compress(delta)
1203 deltalen = len(header) + len(data)
1205 deltalen = len(header) + len(data)
1204 offset = revlog.end(len(revlog) - 1)
1206 offset = revlog.end(len(revlog) - 1)
1205 dist = deltalen + offset - revlog.start(chainbase)
1207 dist = deltalen + offset - revlog.start(chainbase)
1206 chainlen, compresseddeltalen = revlog._chaininfo(base)
1208 chainlen, compresseddeltalen = revlog._chaininfo(base)
1207 chainlen += 1
1209 chainlen += 1
1208 compresseddeltalen += deltalen
1210 compresseddeltalen += deltalen
1209
1211
1210 return _deltainfo(
1212 return _deltainfo(
1211 dist,
1213 dist,
1212 deltalen,
1214 deltalen,
1213 (header, data),
1215 (header, data),
1214 deltabase,
1216 deltabase,
1215 chainbase,
1217 chainbase,
1216 chainlen,
1218 chainlen,
1217 compresseddeltalen,
1219 compresseddeltalen,
1218 snapshotdepth,
1220 snapshotdepth,
1219 )
1221 )
1220
1222
1221 def _fullsnapshotinfo(self, fh, revinfo, curr):
1223 def _fullsnapshotinfo(self, fh, revinfo, curr):
1222 rawtext = self.buildtext(revinfo, fh)
1224 rawtext = self.buildtext(revinfo, fh)
1223 data = self.revlog.compress(rawtext)
1225 data = self.revlog.compress(rawtext)
1224 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1226 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1225 deltabase = chainbase = curr
1227 deltabase = chainbase = curr
1226 snapshotdepth = 0
1228 snapshotdepth = 0
1227 chainlen = 1
1229 chainlen = 1
1228
1230
1229 return _deltainfo(
1231 return _deltainfo(
1230 dist,
1232 dist,
1231 deltalen,
1233 deltalen,
1232 data,
1234 data,
1233 deltabase,
1235 deltabase,
1234 chainbase,
1236 chainbase,
1235 chainlen,
1237 chainlen,
1236 compresseddeltalen,
1238 compresseddeltalen,
1237 snapshotdepth,
1239 snapshotdepth,
1238 )
1240 )
1239
1241
1240 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1242 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1241 """Find an acceptable delta against a candidate revision
1243 """Find an acceptable delta against a candidate revision
1242
1244
1243 revinfo: information about the revision (instance of _revisioninfo)
1245 revinfo: information about the revision (instance of _revisioninfo)
1244 fh: file handle to either the .i or the .d revlog file,
1246 fh: file handle to either the .i or the .d revlog file,
1245 depending on whether it is inlined or not
1247 depending on whether it is inlined or not
1246
1248
1247 Returns the first acceptable candidate revision, as ordered by
1249 Returns the first acceptable candidate revision, as ordered by
1248 _candidategroups
1250 _candidategroups
1249
1251
1250 If no suitable deltabase is found, we return delta info for a full
1252 If no suitable deltabase is found, we return delta info for a full
1251 snapshot.
1253 snapshot.
1252
1254
1253 `excluded_bases` is an optional set of revision that cannot be used as
1255 `excluded_bases` is an optional set of revision that cannot be used as
1254 a delta base. Use this to recompute delta suitable in censor or strip
1256 a delta base. Use this to recompute delta suitable in censor or strip
1255 context.
1257 context.
1256 """
1258 """
1257 if target_rev is None:
1259 if target_rev is None:
1258 target_rev = len(self.revlog)
1260 target_rev = len(self.revlog)
1259
1261
1260 if not revinfo.textlen:
1262 if not revinfo.textlen:
1261 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1263 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1262
1264
1263 if excluded_bases is None:
1265 if excluded_bases is None:
1264 excluded_bases = set()
1266 excluded_bases = set()
1265
1267
1266 # no delta for flag processor revision (see "candelta" for why)
1268 # no delta for flag processor revision (see "candelta" for why)
1267 # not calling candelta since only one revision needs test, also to
1269 # not calling candelta since only one revision needs test, also to
1268 # avoid overhead fetching flags again.
1270 # avoid overhead fetching flags again.
1269 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1271 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1270 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1272 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1271
1273
1272 debug_search = self._write_debug is not None and self._debug_search
1273 gather_debug = self._gather_debug
1274 gather_debug = self._gather_debug
1274
1275
1275 if gather_debug:
1276 if gather_debug:
1276 start = util.timer()
1277 start = util.timer()
1277
1278
1278 # count the number of different delta we tried (for debug purpose)
1279 # count the number of different delta we tried (for debug purpose)
1279 dbg_try_count = 0
1280 dbg_try_count = 0
1280 # count the number of "search round" we did. (for debug purpose)
1281 # count the number of "search round" we did. (for debug purpose)
1281 dbg_try_rounds = 0
1282 dbg_try_rounds = 0
1282 dbg_type = b'unknown'
1283 dbg_type = b'unknown'
1283
1284
1284 cachedelta = revinfo.cachedelta
1285 cachedelta = revinfo.cachedelta
1285 p1 = revinfo.p1
1286 p1 = revinfo.p1
1286 p2 = revinfo.p2
1287 p2 = revinfo.p2
1287 revlog = self.revlog
1288 revlog = self.revlog
1288
1289
1289 deltainfo = None
1290 deltainfo = None
1290 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1291 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1291
1292
1292 if gather_debug:
1293 if gather_debug:
1293 if p1r != nullrev:
1294 if p1r != nullrev:
1294 p1_chain_len = revlog._chaininfo(p1r)[0]
1295 p1_chain_len = revlog._chaininfo(p1r)[0]
1295 else:
1296 else:
1296 p1_chain_len = -1
1297 p1_chain_len = -1
1297 if p2r != nullrev:
1298 if p2r != nullrev:
1298 p2_chain_len = revlog._chaininfo(p2r)[0]
1299 p2_chain_len = revlog._chaininfo(p2r)[0]
1299 else:
1300 else:
1300 p2_chain_len = -1
1301 p2_chain_len = -1
1301 if debug_search:
1302 if self._debug_search:
1302 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1303 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1303 msg %= target_rev
1304 msg %= target_rev
1304 self._write_debug(msg)
1305 self._write_debug(msg)
1305
1306
1306 groups = _candidategroups(
1307 groups = _candidategroups(
1307 self.revlog,
1308 self.revlog,
1308 revinfo.textlen,
1309 revinfo.textlen,
1309 p1r,
1310 p1r,
1310 p2r,
1311 p2r,
1311 cachedelta,
1312 cachedelta,
1312 excluded_bases,
1313 excluded_bases,
1313 target_rev,
1314 target_rev,
1314 snapshot_cache=self._snapshot_cache,
1315 snapshot_cache=self._snapshot_cache,
1315 )
1316 )
1316 candidaterevs = next(groups)
1317 candidaterevs = next(groups)
1317 while candidaterevs is not None:
1318 while candidaterevs is not None:
1318 dbg_try_rounds += 1
1319 dbg_try_rounds += 1
1319 if debug_search:
1320 if self._debug_search:
1320 prev = None
1321 prev = None
1321 if deltainfo is not None:
1322 if deltainfo is not None:
1322 prev = deltainfo.base
1323 prev = deltainfo.base
1323
1324
1324 if (
1325 if (
1325 cachedelta is not None
1326 cachedelta is not None
1326 and len(candidaterevs) == 1
1327 and len(candidaterevs) == 1
1327 and cachedelta[0] in candidaterevs
1328 and cachedelta[0] in candidaterevs
1328 ):
1329 ):
1329 round_type = b"cached-delta"
1330 round_type = b"cached-delta"
1330 elif p1 in candidaterevs or p2 in candidaterevs:
1331 elif p1 in candidaterevs or p2 in candidaterevs:
1331 round_type = b"parents"
1332 round_type = b"parents"
1332 elif prev is not None and all(c < prev for c in candidaterevs):
1333 elif prev is not None and all(c < prev for c in candidaterevs):
1333 round_type = b"refine-down"
1334 round_type = b"refine-down"
1334 elif prev is not None and all(c > prev for c in candidaterevs):
1335 elif prev is not None and all(c > prev for c in candidaterevs):
1335 round_type = b"refine-up"
1336 round_type = b"refine-up"
1336 else:
1337 else:
1337 round_type = b"search-down"
1338 round_type = b"search-down"
1338 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1339 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1339 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1340 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1340 self._write_debug(msg)
1341 self._write_debug(msg)
1341 nominateddeltas = []
1342 nominateddeltas = []
1342 if deltainfo is not None:
1343 if deltainfo is not None:
1343 if debug_search:
1344 if self._debug_search:
1344 msg = (
1345 msg = (
1345 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1346 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1346 )
1347 )
1347 msg %= (deltainfo.base, deltainfo.deltalen)
1348 msg %= (deltainfo.base, deltainfo.deltalen)
1348 self._write_debug(msg)
1349 self._write_debug(msg)
1349 # if we already found a good delta,
1350 # if we already found a good delta,
1350 # challenge it against refined candidates
1351 # challenge it against refined candidates
1351 nominateddeltas.append(deltainfo)
1352 nominateddeltas.append(deltainfo)
1352 for candidaterev in candidaterevs:
1353 for candidaterev in candidaterevs:
1353 if debug_search:
1354 if self._debug_search:
1354 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1355 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1355 msg %= candidaterev
1356 msg %= candidaterev
1356 self._write_debug(msg)
1357 self._write_debug(msg)
1357 candidate_type = None
1358 candidate_type = None
1358 if candidaterev == p1:
1359 if candidaterev == p1:
1359 candidate_type = b"p1"
1360 candidate_type = b"p1"
1360 elif candidaterev == p2:
1361 elif candidaterev == p2:
1361 candidate_type = b"p2"
1362 candidate_type = b"p2"
1362 elif self.revlog.issnapshot(candidaterev):
1363 elif self.revlog.issnapshot(candidaterev):
1363 candidate_type = b"snapshot-%d"
1364 candidate_type = b"snapshot-%d"
1364 candidate_type %= self.revlog.snapshotdepth(
1365 candidate_type %= self.revlog.snapshotdepth(
1365 candidaterev
1366 candidaterev
1366 )
1367 )
1367
1368
1368 if candidate_type is not None:
1369 if candidate_type is not None:
1369 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1370 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1370 msg %= candidate_type
1371 msg %= candidate_type
1371 self._write_debug(msg)
1372 self._write_debug(msg)
1372 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1373 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1373 msg %= self.revlog.length(candidaterev)
1374 msg %= self.revlog.length(candidaterev)
1374 self._write_debug(msg)
1375 self._write_debug(msg)
1375 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1376 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1376 msg %= self.revlog.deltaparent(candidaterev)
1377 msg %= self.revlog.deltaparent(candidaterev)
1377 self._write_debug(msg)
1378 self._write_debug(msg)
1378
1379
1379 dbg_try_count += 1
1380 dbg_try_count += 1
1380
1381
1381 if debug_search:
1382 if self._debug_search:
1382 delta_start = util.timer()
1383 delta_start = util.timer()
1383 candidatedelta = self._builddeltainfo(
1384 candidatedelta = self._builddeltainfo(
1384 revinfo,
1385 revinfo,
1385 candidaterev,
1386 candidaterev,
1386 fh,
1387 fh,
1387 target_rev=target_rev,
1388 target_rev=target_rev,
1388 )
1389 )
1389 if debug_search:
1390 if self._debug_search:
1390 delta_end = util.timer()
1391 delta_end = util.timer()
1391 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1392 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1392 msg %= delta_end - delta_start
1393 msg %= delta_end - delta_start
1393 self._write_debug(msg)
1394 self._write_debug(msg)
1394 if candidatedelta is not None:
1395 if candidatedelta is not None:
1395 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1396 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1396 if debug_search:
1397 if self._debug_search:
1397 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1398 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1398 msg %= candidatedelta.deltalen
1399 msg %= candidatedelta.deltalen
1399 self._write_debug(msg)
1400 self._write_debug(msg)
1400 nominateddeltas.append(candidatedelta)
1401 nominateddeltas.append(candidatedelta)
1401 elif debug_search:
1402 elif self._debug_search:
1402 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1403 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1403 msg %= candidatedelta.deltalen
1404 msg %= candidatedelta.deltalen
1404 self._write_debug(msg)
1405 self._write_debug(msg)
1405 elif debug_search:
1406 elif self._debug_search:
1406 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1407 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1407 self._write_debug(msg)
1408 self._write_debug(msg)
1408 if nominateddeltas:
1409 if nominateddeltas:
1409 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1410 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1410 if deltainfo is not None:
1411 if deltainfo is not None:
1411 candidaterevs = groups.send(deltainfo.base)
1412 candidaterevs = groups.send(deltainfo.base)
1412 else:
1413 else:
1413 candidaterevs = next(groups)
1414 candidaterevs = next(groups)
1414
1415
1415 if deltainfo is None:
1416 if deltainfo is None:
1416 dbg_type = b"full"
1417 dbg_type = b"full"
1417 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1418 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1418 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1419 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1419 dbg_type = b"snapshot"
1420 dbg_type = b"snapshot"
1420 else:
1421 else:
1421 dbg_type = b"delta"
1422 dbg_type = b"delta"
1422
1423
1423 if gather_debug:
1424 if gather_debug:
1424 end = util.timer()
1425 end = util.timer()
1425 if dbg_type == b'full':
1426 if dbg_type == b'full':
1426 used_cached = (
1427 used_cached = (
1427 cachedelta is not None
1428 cachedelta is not None
1428 and dbg_try_rounds == 0
1429 and dbg_try_rounds == 0
1429 and dbg_try_count == 0
1430 and dbg_try_count == 0
1430 and cachedelta[0] == nullrev
1431 and cachedelta[0] == nullrev
1431 )
1432 )
1432 else:
1433 else:
1433 used_cached = (
1434 used_cached = (
1434 cachedelta is not None
1435 cachedelta is not None
1435 and dbg_try_rounds == 1
1436 and dbg_try_rounds == 1
1436 and dbg_try_count == 1
1437 and dbg_try_count == 1
1437 and deltainfo.base == cachedelta[0]
1438 and deltainfo.base == cachedelta[0]
1438 )
1439 )
1439 dbg = {
1440 dbg = {
1440 'duration': end - start,
1441 'duration': end - start,
1441 'revision': target_rev,
1442 'revision': target_rev,
1442 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1443 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1443 'search_round_count': dbg_try_rounds,
1444 'search_round_count': dbg_try_rounds,
1444 'using-cached-base': used_cached,
1445 'using-cached-base': used_cached,
1445 'delta_try_count': dbg_try_count,
1446 'delta_try_count': dbg_try_count,
1446 'type': dbg_type,
1447 'type': dbg_type,
1447 'p1-chain-len': p1_chain_len,
1448 'p1-chain-len': p1_chain_len,
1448 'p2-chain-len': p2_chain_len,
1449 'p2-chain-len': p2_chain_len,
1449 }
1450 }
1450 if (
1451 if (
1451 deltainfo.snapshotdepth # pytype: disable=attribute-error
1452 deltainfo.snapshotdepth # pytype: disable=attribute-error
1452 is not None
1453 is not None
1453 ):
1454 ):
1454 dbg[
1455 dbg[
1455 'snapshot-depth'
1456 'snapshot-depth'
1456 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1457 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1457 else:
1458 else:
1458 dbg['snapshot-depth'] = 0
1459 dbg['snapshot-depth'] = 0
1459 target_revlog = b"UNKNOWN"
1460 target_revlog = b"UNKNOWN"
1460 target_type = self.revlog.target[0]
1461 target_type = self.revlog.target[0]
1461 target_key = self.revlog.target[1]
1462 target_key = self.revlog.target[1]
1462 if target_type == KIND_CHANGELOG:
1463 if target_type == KIND_CHANGELOG:
1463 target_revlog = b'CHANGELOG:'
1464 target_revlog = b'CHANGELOG:'
1464 elif target_type == KIND_MANIFESTLOG:
1465 elif target_type == KIND_MANIFESTLOG:
1465 target_revlog = b'MANIFESTLOG:'
1466 target_revlog = b'MANIFESTLOG:'
1466 if target_key:
1467 if target_key:
1467 target_revlog += b'%s:' % target_key
1468 target_revlog += b'%s:' % target_key
1468 elif target_type == KIND_FILELOG:
1469 elif target_type == KIND_FILELOG:
1469 target_revlog = b'FILELOG:'
1470 target_revlog = b'FILELOG:'
1470 if target_key:
1471 if target_key:
1471 target_revlog += b'%s:' % target_key
1472 target_revlog += b'%s:' % target_key
1472 dbg['target-revlog'] = target_revlog
1473 dbg['target-revlog'] = target_revlog
1473
1474
1474 if self._debug_info is not None:
1475 if self._debug_info is not None:
1475 self._debug_info.append(dbg)
1476 self._debug_info.append(dbg)
1476
1477
1477 if self._write_debug is not None:
1478 if self._write_debug is not None:
1478 msg = (
1479 msg = (
1479 b"DBG-DELTAS:"
1480 b"DBG-DELTAS:"
1480 b" %-12s"
1481 b" %-12s"
1481 b" rev=%d:"
1482 b" rev=%d:"
1482 b" delta-base=%d"
1483 b" delta-base=%d"
1483 b" is-cached=%d"
1484 b" is-cached=%d"
1484 b" - search-rounds=%d"
1485 b" - search-rounds=%d"
1485 b" try-count=%d"
1486 b" try-count=%d"
1486 b" - delta-type=%-6s"
1487 b" - delta-type=%-6s"
1487 b" snap-depth=%d"
1488 b" snap-depth=%d"
1488 b" - p1-chain-length=%d"
1489 b" - p1-chain-length=%d"
1489 b" p2-chain-length=%d"
1490 b" p2-chain-length=%d"
1490 b" - duration=%f"
1491 b" - duration=%f"
1491 b"\n"
1492 b"\n"
1492 )
1493 )
1493 msg %= (
1494 msg %= (
1494 dbg["target-revlog"],
1495 dbg["target-revlog"],
1495 dbg["revision"],
1496 dbg["revision"],
1496 dbg["delta-base"],
1497 dbg["delta-base"],
1497 dbg["using-cached-base"],
1498 dbg["using-cached-base"],
1498 dbg["search_round_count"],
1499 dbg["search_round_count"],
1499 dbg["delta_try_count"],
1500 dbg["delta_try_count"],
1500 dbg["type"],
1501 dbg["type"],
1501 dbg["snapshot-depth"],
1502 dbg["snapshot-depth"],
1502 dbg["p1-chain-len"],
1503 dbg["p1-chain-len"],
1503 dbg["p2-chain-len"],
1504 dbg["p2-chain-len"],
1504 dbg["duration"],
1505 dbg["duration"],
1505 )
1506 )
1506 self._write_debug(msg)
1507 self._write_debug(msg)
1507 return deltainfo
1508 return deltainfo
1508
1509
1509
1510
1510 def delta_compression(default_compression_header, deltainfo):
1511 def delta_compression(default_compression_header, deltainfo):
1511 """return (COMPRESSION_MODE, deltainfo)
1512 """return (COMPRESSION_MODE, deltainfo)
1512
1513
1513 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1514 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1514 compression.
1515 compression.
1515 """
1516 """
1516 h, d = deltainfo.data
1517 h, d = deltainfo.data
1517 compression_mode = COMP_MODE_INLINE
1518 compression_mode = COMP_MODE_INLINE
1518 if not h and not d:
1519 if not h and not d:
1519 # not data to store at all... declare them uncompressed
1520 # not data to store at all... declare them uncompressed
1520 compression_mode = COMP_MODE_PLAIN
1521 compression_mode = COMP_MODE_PLAIN
1521 elif not h:
1522 elif not h:
1522 t = d[0:1]
1523 t = d[0:1]
1523 if t == b'\0':
1524 if t == b'\0':
1524 compression_mode = COMP_MODE_PLAIN
1525 compression_mode = COMP_MODE_PLAIN
1525 elif t == default_compression_header:
1526 elif t == default_compression_header:
1526 compression_mode = COMP_MODE_DEFAULT
1527 compression_mode = COMP_MODE_DEFAULT
1527 elif h == b'u':
1528 elif h == b'u':
1528 # we have a more efficient way to declare uncompressed
1529 # we have a more efficient way to declare uncompressed
1529 h = b''
1530 h = b''
1530 compression_mode = COMP_MODE_PLAIN
1531 compression_mode = COMP_MODE_PLAIN
1531 deltainfo = drop_u_compression(deltainfo)
1532 deltainfo = drop_u_compression(deltainfo)
1532 return compression_mode, deltainfo
1533 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now