##// END OF EJS Templates
delta-find: remove dead code intended to deal with forced delta reuse...
marmoute -
r51591:f1b57672 default
parent child Browse files
Show More
@@ -1,1631 +1,1624 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 self._data = data
51 self._data = data
52 self._srdensitythreshold = density
52 self._srdensitythreshold = density
53 self._srmingapsize = mingap
53 self._srmingapsize = mingap
54 self._snapshot = set(snapshot)
54 self._snapshot = set(snapshot)
55 self.index = None
55 self.index = None
56
56
57 def start(self, rev):
57 def start(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 if rev == 0:
60 if rev == 0:
61 return 0
61 return 0
62 return self._data[rev - 1]
62 return self._data[rev - 1]
63
63
64 def end(self, rev):
64 def end(self, rev):
65 if rev == nullrev:
65 if rev == nullrev:
66 return 0
66 return 0
67 return self._data[rev]
67 return self._data[rev]
68
68
69 def length(self, rev):
69 def length(self, rev):
70 return self.end(rev) - self.start(rev)
70 return self.end(rev) - self.start(rev)
71
71
72 def __len__(self):
72 def __len__(self):
73 return len(self._data)
73 return len(self._data)
74
74
75 def issnapshot(self, rev):
75 def issnapshot(self, rev):
76 if rev == nullrev:
76 if rev == nullrev:
77 return True
77 return True
78 return rev in self._snapshot
78 return rev in self._snapshot
79
79
80
80
81 def slicechunk(revlog, revs, targetsize=None):
81 def slicechunk(revlog, revs, targetsize=None):
82 """slice revs to reduce the amount of unrelated data to be read from disk.
82 """slice revs to reduce the amount of unrelated data to be read from disk.
83
83
84 ``revs`` is sliced into groups that should be read in one time.
84 ``revs`` is sliced into groups that should be read in one time.
85 Assume that revs are sorted.
85 Assume that revs are sorted.
86
86
87 The initial chunk is sliced until the overall density (payload/chunks-span
87 The initial chunk is sliced until the overall density (payload/chunks-span
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 `revlog._srmingapsize` is skipped.
89 `revlog._srmingapsize` is skipped.
90
90
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 For consistency with other slicing choice, this limit won't go lower than
92 For consistency with other slicing choice, this limit won't go lower than
93 `revlog._srmingapsize`.
93 `revlog._srmingapsize`.
94
94
95 If individual revisions chunk are larger than this limit, they will still
95 If individual revisions chunk are larger than this limit, they will still
96 be raised individually.
96 be raised individually.
97
97
98 >>> data = [
98 >>> data = [
99 ... 5, #00 (5)
99 ... 5, #00 (5)
100 ... 10, #01 (5)
100 ... 10, #01 (5)
101 ... 12, #02 (2)
101 ... 12, #02 (2)
102 ... 12, #03 (empty)
102 ... 12, #03 (empty)
103 ... 27, #04 (15)
103 ... 27, #04 (15)
104 ... 31, #05 (4)
104 ... 31, #05 (4)
105 ... 31, #06 (empty)
105 ... 31, #06 (empty)
106 ... 42, #07 (11)
106 ... 42, #07 (11)
107 ... 47, #08 (5)
107 ... 47, #08 (5)
108 ... 47, #09 (empty)
108 ... 47, #09 (empty)
109 ... 48, #10 (1)
109 ... 48, #10 (1)
110 ... 51, #11 (3)
110 ... 51, #11 (3)
111 ... 74, #12 (23)
111 ... 74, #12 (23)
112 ... 85, #13 (11)
112 ... 85, #13 (11)
113 ... 86, #14 (1)
113 ... 86, #14 (1)
114 ... 91, #15 (5)
114 ... 91, #15 (5)
115 ... ]
115 ... ]
116 >>> revlog = _testrevlog(data, snapshot=range(16))
116 >>> revlog = _testrevlog(data, snapshot=range(16))
117
117
118 >>> list(slicechunk(revlog, list(range(16))))
118 >>> list(slicechunk(revlog, list(range(16))))
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 >>> list(slicechunk(revlog, [0, 15]))
120 >>> list(slicechunk(revlog, [0, 15]))
121 [[0], [15]]
121 [[0], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 [[0], [11], [15]]
123 [[0], [11], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 [[0], [11, 13, 15]]
125 [[0], [11, 13, 15]]
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 [[1, 2], [5, 8, 10, 11], [14]]
127 [[1, 2], [5, 8, 10, 11], [14]]
128
128
129 Slicing with a maximum chunk size
129 Slicing with a maximum chunk size
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 [[0], [11], [13], [15]]
131 [[0], [11], [13], [15]]
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 [[0], [11], [13, 15]]
133 [[0], [11], [13, 15]]
134
134
135 Slicing involving nullrev
135 Slicing involving nullrev
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 [[-1, 0], [11], [13, 15]]
137 [[-1, 0], [11], [13, 15]]
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 [[-1], [13], [15]]
139 [[-1], [13], [15]]
140 """
140 """
141 if targetsize is not None:
141 if targetsize is not None:
142 targetsize = max(targetsize, revlog._srmingapsize)
142 targetsize = max(targetsize, revlog._srmingapsize)
143 # targetsize should not be specified when evaluating delta candidates:
143 # targetsize should not be specified when evaluating delta candidates:
144 # * targetsize is used to ensure we stay within specification when reading,
144 # * targetsize is used to ensure we stay within specification when reading,
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 if densityslicing is None:
146 if densityslicing is None:
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 for chunk in densityslicing(
148 for chunk in densityslicing(
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 ):
150 ):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 yield subchunk
152 yield subchunk
153
153
154
154
155 def _slicechunktosize(revlog, revs, targetsize=None):
155 def _slicechunktosize(revlog, revs, targetsize=None):
156 """slice revs to match the target size
156 """slice revs to match the target size
157
157
158 This is intended to be used on chunk that density slicing selected by that
158 This is intended to be used on chunk that density slicing selected by that
159 are still too large compared to the read garantee of revlog. This might
159 are still too large compared to the read garantee of revlog. This might
160 happens when "minimal gap size" interrupted the slicing or when chain are
160 happens when "minimal gap size" interrupted the slicing or when chain are
161 built in a way that create large blocks next to each other.
161 built in a way that create large blocks next to each other.
162
162
163 >>> data = [
163 >>> data = [
164 ... 3, #0 (3)
164 ... 3, #0 (3)
165 ... 5, #1 (2)
165 ... 5, #1 (2)
166 ... 6, #2 (1)
166 ... 6, #2 (1)
167 ... 8, #3 (2)
167 ... 8, #3 (2)
168 ... 8, #4 (empty)
168 ... 8, #4 (empty)
169 ... 11, #5 (3)
169 ... 11, #5 (3)
170 ... 12, #6 (1)
170 ... 12, #6 (1)
171 ... 13, #7 (1)
171 ... 13, #7 (1)
172 ... 14, #8 (1)
172 ... 14, #8 (1)
173 ... ]
173 ... ]
174
174
175 == All snapshots cases ==
175 == All snapshots cases ==
176 >>> revlog = _testrevlog(data, snapshot=range(9))
176 >>> revlog = _testrevlog(data, snapshot=range(9))
177
177
178 Cases where chunk is already small enough
178 Cases where chunk is already small enough
179 >>> list(_slicechunktosize(revlog, [0], 3))
179 >>> list(_slicechunktosize(revlog, [0], 3))
180 [[0]]
180 [[0]]
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 [[6, 7]]
182 [[6, 7]]
183 >>> list(_slicechunktosize(revlog, [0], None))
183 >>> list(_slicechunktosize(revlog, [0], None))
184 [[0]]
184 [[0]]
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 [[6, 7]]
186 [[6, 7]]
187
187
188 cases where we need actual slicing
188 cases where we need actual slicing
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 [[0], [1]]
190 [[0], [1]]
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 [[1], [3]]
192 [[1], [3]]
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 [[1, 2], [3]]
194 [[1, 2], [3]]
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 [[3], [5]]
198 [[3], [5]]
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 [[5], [6, 7, 8]]
200 [[5], [6, 7, 8]]
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203
203
204 Case with too large individual chunk (must return valid chunk)
204 Case with too large individual chunk (must return valid chunk)
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 [[0], [1]]
206 [[0], [1]]
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 [[1], [3]]
208 [[1], [3]]
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 [[3], [5]]
210 [[3], [5]]
211
211
212 == No Snapshot cases ==
212 == No Snapshot cases ==
213 >>> revlog = _testrevlog(data)
213 >>> revlog = _testrevlog(data)
214
214
215 Cases where chunk is already small enough
215 Cases where chunk is already small enough
216 >>> list(_slicechunktosize(revlog, [0], 3))
216 >>> list(_slicechunktosize(revlog, [0], 3))
217 [[0]]
217 [[0]]
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 [[6, 7]]
219 [[6, 7]]
220 >>> list(_slicechunktosize(revlog, [0], None))
220 >>> list(_slicechunktosize(revlog, [0], None))
221 [[0]]
221 [[0]]
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 [[6, 7]]
223 [[6, 7]]
224
224
225 cases where we need actual slicing
225 cases where we need actual slicing
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 [[0], [1]]
227 [[0], [1]]
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 [[1], [3]]
229 [[1], [3]]
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 [[1], [2, 3]]
231 [[1], [2, 3]]
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 [[3], [5]]
233 [[3], [5]]
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 [[3], [4, 5]]
235 [[3], [4, 5]]
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 [[5], [6, 7, 8]]
237 [[5], [6, 7, 8]]
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240
240
241 Case with too large individual chunk (must return valid chunk)
241 Case with too large individual chunk (must return valid chunk)
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 [[0], [1]]
243 [[0], [1]]
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 [[1], [3]]
245 [[1], [3]]
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 [[3], [5]]
247 [[3], [5]]
248
248
249 == mixed case ==
249 == mixed case ==
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 """
253 """
254 assert targetsize is None or 0 <= targetsize
254 assert targetsize is None or 0 <= targetsize
255 startdata = revlog.start(revs[0])
255 startdata = revlog.start(revs[0])
256 enddata = revlog.end(revs[-1])
256 enddata = revlog.end(revs[-1])
257 fullspan = enddata - startdata
257 fullspan = enddata - startdata
258 if targetsize is None or fullspan <= targetsize:
258 if targetsize is None or fullspan <= targetsize:
259 yield revs
259 yield revs
260 return
260 return
261
261
262 startrevidx = 0
262 startrevidx = 0
263 endrevidx = 1
263 endrevidx = 1
264 iterrevs = enumerate(revs)
264 iterrevs = enumerate(revs)
265 next(iterrevs) # skip first rev.
265 next(iterrevs) # skip first rev.
266 # first step: get snapshots out of the way
266 # first step: get snapshots out of the way
267 for idx, r in iterrevs:
267 for idx, r in iterrevs:
268 span = revlog.end(r) - startdata
268 span = revlog.end(r) - startdata
269 snapshot = revlog.issnapshot(r)
269 snapshot = revlog.issnapshot(r)
270 if span <= targetsize and snapshot:
270 if span <= targetsize and snapshot:
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 else:
272 else:
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 if chunk:
274 if chunk:
275 yield chunk
275 yield chunk
276 startrevidx = idx
276 startrevidx = idx
277 startdata = revlog.start(r)
277 startdata = revlog.start(r)
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 if not snapshot:
279 if not snapshot:
280 break
280 break
281
281
282 # for the others, we use binary slicing to quickly converge toward valid
282 # for the others, we use binary slicing to quickly converge toward valid
283 # chunks (otherwise, we might end up looking for start/end of many
283 # chunks (otherwise, we might end up looking for start/end of many
284 # revisions). This logic is not looking for the perfect slicing point, it
284 # revisions). This logic is not looking for the perfect slicing point, it
285 # focuses on quickly converging toward valid chunks.
285 # focuses on quickly converging toward valid chunks.
286 nbitem = len(revs)
286 nbitem = len(revs)
287 while (enddata - startdata) > targetsize:
287 while (enddata - startdata) > targetsize:
288 endrevidx = nbitem
288 endrevidx = nbitem
289 if nbitem - startrevidx <= 1:
289 if nbitem - startrevidx <= 1:
290 break # protect against individual chunk larger than limit
290 break # protect against individual chunk larger than limit
291 localenddata = revlog.end(revs[endrevidx - 1])
291 localenddata = revlog.end(revs[endrevidx - 1])
292 span = localenddata - startdata
292 span = localenddata - startdata
293 while span > targetsize:
293 while span > targetsize:
294 if endrevidx - startrevidx <= 1:
294 if endrevidx - startrevidx <= 1:
295 break # protect against individual chunk larger than limit
295 break # protect against individual chunk larger than limit
296 endrevidx -= (endrevidx - startrevidx) // 2
296 endrevidx -= (endrevidx - startrevidx) // 2
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 if chunk:
300 if chunk:
301 yield chunk
301 yield chunk
302 startrevidx = endrevidx
302 startrevidx = endrevidx
303 startdata = revlog.start(revs[startrevidx])
303 startdata = revlog.start(revs[startrevidx])
304
304
305 chunk = _trimchunk(revlog, revs, startrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308
308
309
309
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 """slice revs to reduce the amount of unrelated data to be read from disk.
311 """slice revs to reduce the amount of unrelated data to be read from disk.
312
312
313 ``revs`` is sliced into groups that should be read in one time.
313 ``revs`` is sliced into groups that should be read in one time.
314 Assume that revs are sorted.
314 Assume that revs are sorted.
315
315
316 The initial chunk is sliced until the overall density (payload/chunks-span
316 The initial chunk is sliced until the overall density (payload/chunks-span
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 skipped.
318 skipped.
319
319
320 >>> revlog = _testrevlog([
320 >>> revlog = _testrevlog([
321 ... 5, #00 (5)
321 ... 5, #00 (5)
322 ... 10, #01 (5)
322 ... 10, #01 (5)
323 ... 12, #02 (2)
323 ... 12, #02 (2)
324 ... 12, #03 (empty)
324 ... 12, #03 (empty)
325 ... 27, #04 (15)
325 ... 27, #04 (15)
326 ... 31, #05 (4)
326 ... 31, #05 (4)
327 ... 31, #06 (empty)
327 ... 31, #06 (empty)
328 ... 42, #07 (11)
328 ... 42, #07 (11)
329 ... 47, #08 (5)
329 ... 47, #08 (5)
330 ... 47, #09 (empty)
330 ... 47, #09 (empty)
331 ... 48, #10 (1)
331 ... 48, #10 (1)
332 ... 51, #11 (3)
332 ... 51, #11 (3)
333 ... 74, #12 (23)
333 ... 74, #12 (23)
334 ... 85, #13 (11)
334 ... 85, #13 (11)
335 ... 86, #14 (1)
335 ... 86, #14 (1)
336 ... 91, #15 (5)
336 ... 91, #15 (5)
337 ... ])
337 ... ])
338
338
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 [[0], [15]]
342 [[0], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 [[0], [11], [15]]
344 [[0], [11], [15]]
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 [[0], [11, 13, 15]]
346 [[0], [11, 13, 15]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 ... mingapsize=20))
350 ... mingapsize=20))
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 ... targetdensity=0.95))
353 ... targetdensity=0.95))
354 [[1, 2], [5], [8, 10, 11], [14]]
354 [[1, 2], [5], [8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... targetdensity=0.95, mingapsize=12))
356 ... targetdensity=0.95, mingapsize=12))
357 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
358 """
358 """
359 start = revlog.start
359 start = revlog.start
360 length = revlog.length
360 length = revlog.length
361
361
362 if len(revs) <= 1:
362 if len(revs) <= 1:
363 yield revs
363 yield revs
364 return
364 return
365
365
366 deltachainspan = segmentspan(revlog, revs)
366 deltachainspan = segmentspan(revlog, revs)
367
367
368 if deltachainspan < mingapsize:
368 if deltachainspan < mingapsize:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 readdata = deltachainspan
372 readdata = deltachainspan
373 chainpayload = sum(length(r) for r in revs)
373 chainpayload = sum(length(r) for r in revs)
374
374
375 if deltachainspan:
375 if deltachainspan:
376 density = chainpayload / float(deltachainspan)
376 density = chainpayload / float(deltachainspan)
377 else:
377 else:
378 density = 1.0
378 density = 1.0
379
379
380 if density >= targetdensity:
380 if density >= targetdensity:
381 yield revs
381 yield revs
382 return
382 return
383
383
384 # Store the gaps in a heap to have them sorted by decreasing size
384 # Store the gaps in a heap to have them sorted by decreasing size
385 gaps = []
385 gaps = []
386 prevend = None
386 prevend = None
387 for i, rev in enumerate(revs):
387 for i, rev in enumerate(revs):
388 revstart = start(rev)
388 revstart = start(rev)
389 revlen = length(rev)
389 revlen = length(rev)
390
390
391 # Skip empty revisions to form larger holes
391 # Skip empty revisions to form larger holes
392 if revlen == 0:
392 if revlen == 0:
393 continue
393 continue
394
394
395 if prevend is not None:
395 if prevend is not None:
396 gapsize = revstart - prevend
396 gapsize = revstart - prevend
397 # only consider holes that are large enough
397 # only consider holes that are large enough
398 if gapsize > mingapsize:
398 if gapsize > mingapsize:
399 gaps.append((gapsize, i))
399 gaps.append((gapsize, i))
400
400
401 prevend = revstart + revlen
401 prevend = revstart + revlen
402 # sort the gaps to pop them from largest to small
402 # sort the gaps to pop them from largest to small
403 gaps.sort()
403 gaps.sort()
404
404
405 # Collect the indices of the largest holes until the density is acceptable
405 # Collect the indices of the largest holes until the density is acceptable
406 selected = []
406 selected = []
407 while gaps and density < targetdensity:
407 while gaps and density < targetdensity:
408 gapsize, gapidx = gaps.pop()
408 gapsize, gapidx = gaps.pop()
409
409
410 selected.append(gapidx)
410 selected.append(gapidx)
411
411
412 # the gap sizes are stored as negatives to be sorted decreasingly
412 # the gap sizes are stored as negatives to be sorted decreasingly
413 # by the heap
413 # by the heap
414 readdata -= gapsize
414 readdata -= gapsize
415 if readdata > 0:
415 if readdata > 0:
416 density = chainpayload / float(readdata)
416 density = chainpayload / float(readdata)
417 else:
417 else:
418 density = 1.0
418 density = 1.0
419 selected.sort()
419 selected.sort()
420
420
421 # Cut the revs at collected indices
421 # Cut the revs at collected indices
422 previdx = 0
422 previdx = 0
423 for idx in selected:
423 for idx in selected:
424
424
425 chunk = _trimchunk(revlog, revs, previdx, idx)
425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 if chunk:
426 if chunk:
427 yield chunk
427 yield chunk
428
428
429 previdx = idx
429 previdx = idx
430
430
431 chunk = _trimchunk(revlog, revs, previdx)
431 chunk = _trimchunk(revlog, revs, previdx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435
435
436 def _trimchunk(revlog, revs, startidx, endidx=None):
436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 """returns revs[startidx:endidx] without empty trailing revs
437 """returns revs[startidx:endidx] without empty trailing revs
438
438
439 Doctest Setup
439 Doctest Setup
440 >>> revlog = _testrevlog([
440 >>> revlog = _testrevlog([
441 ... 5, #0
441 ... 5, #0
442 ... 10, #1
442 ... 10, #1
443 ... 12, #2
443 ... 12, #2
444 ... 12, #3 (empty)
444 ... 12, #3 (empty)
445 ... 17, #4
445 ... 17, #4
446 ... 21, #5
446 ... 21, #5
447 ... 21, #6 (empty)
447 ... 21, #6 (empty)
448 ... ])
448 ... ])
449
449
450 Contiguous cases:
450 Contiguous cases:
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 [0, 1, 2, 3, 4, 5]
452 [0, 1, 2, 3, 4, 5]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 [0, 1, 2, 3, 4]
454 [0, 1, 2, 3, 4]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 [0, 1, 2]
456 [0, 1, 2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 [2]
458 [2]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 [3, 4, 5]
460 [3, 4, 5]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 [3, 4]
462 [3, 4]
463
463
464 Discontiguous cases:
464 Discontiguous cases:
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 [1, 3, 5]
466 [1, 3, 5]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 [1]
468 [1]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 [3, 5]
470 [3, 5]
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 [3, 5]
472 [3, 5]
473 """
473 """
474 length = revlog.length
474 length = revlog.length
475
475
476 if endidx is None:
476 if endidx is None:
477 endidx = len(revs)
477 endidx = len(revs)
478
478
479 # If we have a non-emtpy delta candidate, there are nothing to trim
479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 if revs[endidx - 1] < len(revlog):
480 if revs[endidx - 1] < len(revlog):
481 # Trim empty revs at the end, except the very first revision of a chain
481 # Trim empty revs at the end, except the very first revision of a chain
482 while (
482 while (
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 ):
484 ):
485 endidx -= 1
485 endidx -= 1
486
486
487 return revs[startidx:endidx]
487 return revs[startidx:endidx]
488
488
489
489
490 def segmentspan(revlog, revs):
490 def segmentspan(revlog, revs):
491 """Get the byte span of a segment of revisions
491 """Get the byte span of a segment of revisions
492
492
493 revs is a sorted array of revision numbers
493 revs is a sorted array of revision numbers
494
494
495 >>> revlog = _testrevlog([
495 >>> revlog = _testrevlog([
496 ... 5, #0
496 ... 5, #0
497 ... 10, #1
497 ... 10, #1
498 ... 12, #2
498 ... 12, #2
499 ... 12, #3 (empty)
499 ... 12, #3 (empty)
500 ... 17, #4
500 ... 17, #4
501 ... ])
501 ... ])
502
502
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 17
504 17
505 >>> segmentspan(revlog, [0, 4])
505 >>> segmentspan(revlog, [0, 4])
506 17
506 17
507 >>> segmentspan(revlog, [3, 4])
507 >>> segmentspan(revlog, [3, 4])
508 5
508 5
509 >>> segmentspan(revlog, [1, 2, 3,])
509 >>> segmentspan(revlog, [1, 2, 3,])
510 7
510 7
511 >>> segmentspan(revlog, [1, 3])
511 >>> segmentspan(revlog, [1, 3])
512 7
512 7
513 """
513 """
514 if not revs:
514 if not revs:
515 return 0
515 return 0
516 end = revlog.end(revs[-1])
516 end = revlog.end(revs[-1])
517 return end - revlog.start(revs[0])
517 return end - revlog.start(revs[0])
518
518
519
519
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 """build full text from a (base, delta) pair and other metadata"""
521 """build full text from a (base, delta) pair and other metadata"""
522 # special case deltas which replace entire base; no need to decode
522 # special case deltas which replace entire base; no need to decode
523 # base revision. this neatly avoids censored bases, which throw when
523 # base revision. this neatly avoids censored bases, which throw when
524 # they're decoded.
524 # they're decoded.
525 hlen = struct.calcsize(b">lll")
525 hlen = struct.calcsize(b">lll")
526 if delta[:hlen] == mdiff.replacediffheader(
526 if delta[:hlen] == mdiff.replacediffheader(
527 revlog.rawsize(baserev), len(delta) - hlen
527 revlog.rawsize(baserev), len(delta) - hlen
528 ):
528 ):
529 fulltext = delta[hlen:]
529 fulltext = delta[hlen:]
530 else:
530 else:
531 # deltabase is rawtext before changed by flag processors, which is
531 # deltabase is rawtext before changed by flag processors, which is
532 # equivalent to non-raw text
532 # equivalent to non-raw text
533 basetext = revlog.revision(baserev, _df=fh)
533 basetext = revlog.revision(baserev, _df=fh)
534 fulltext = mdiff.patch(basetext, delta)
534 fulltext = mdiff.patch(basetext, delta)
535
535
536 try:
536 try:
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 if validatehash:
538 if validatehash:
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 if flags & REVIDX_ISCENSORED:
540 if flags & REVIDX_ISCENSORED:
541 raise error.StorageError(
541 raise error.StorageError(
542 _(b'node %s is not censored') % expectednode
542 _(b'node %s is not censored') % expectednode
543 )
543 )
544 except error.CensoredNodeError:
544 except error.CensoredNodeError:
545 # must pass the censored index flag to add censored revisions
545 # must pass the censored index flag to add censored revisions
546 if not flags & REVIDX_ISCENSORED:
546 if not flags & REVIDX_ISCENSORED:
547 raise
547 raise
548 return fulltext
548 return fulltext
549
549
550
550
551 @attr.s(slots=True, frozen=True)
551 @attr.s(slots=True, frozen=True)
552 class _deltainfo:
552 class _deltainfo:
553 distance = attr.ib()
553 distance = attr.ib()
554 deltalen = attr.ib()
554 deltalen = attr.ib()
555 data = attr.ib()
555 data = attr.ib()
556 base = attr.ib()
556 base = attr.ib()
557 chainbase = attr.ib()
557 chainbase = attr.ib()
558 chainlen = attr.ib()
558 chainlen = attr.ib()
559 compresseddeltalen = attr.ib()
559 compresseddeltalen = attr.ib()
560 snapshotdepth = attr.ib()
560 snapshotdepth = attr.ib()
561
561
562
562
563 def drop_u_compression(delta):
563 def drop_u_compression(delta):
564 """turn into a "u" (no-compression) into no-compression without header
564 """turn into a "u" (no-compression) into no-compression without header
565
565
566 This is useful for revlog format that has better compression method.
566 This is useful for revlog format that has better compression method.
567 """
567 """
568 assert delta.data[0] == b'u', delta.data[0]
568 assert delta.data[0] == b'u', delta.data[0]
569 return _deltainfo(
569 return _deltainfo(
570 delta.distance,
570 delta.distance,
571 delta.deltalen - 1,
571 delta.deltalen - 1,
572 (b'', delta.data[1]),
572 (b'', delta.data[1]),
573 delta.base,
573 delta.base,
574 delta.chainbase,
574 delta.chainbase,
575 delta.chainlen,
575 delta.chainlen,
576 delta.compresseddeltalen,
576 delta.compresseddeltalen,
577 delta.snapshotdepth,
577 delta.snapshotdepth,
578 )
578 )
579
579
580
580
581 def is_good_delta_info(revlog, deltainfo, revinfo):
581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 """Returns True if the given delta is good. Good means that it is within
582 """Returns True if the given delta is good. Good means that it is within
583 the disk span, disk size, and chain length bounds that we know to be
583 the disk span, disk size, and chain length bounds that we know to be
584 performant."""
584 performant."""
585 if deltainfo is None:
585 if deltainfo is None:
586 return False
586 return False
587
587
588 if (
588 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
589 revinfo.cachedelta is not None
589 # we should never end up asking such question. Adding the assert as a
590 and deltainfo.base == revinfo.cachedelta[0]
590 # safe-guard to detect anything that would be fishy in this regard.
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
591 assert (
592 ):
592 revinfo.cachedelta is None
593 return True
593 or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE
594 or not revlog._generaldelta
595 )
594
596
595 # - 'deltainfo.distance' is the distance from the base revision --
597 # - 'deltainfo.distance' is the distance from the base revision --
596 # bounding it limits the amount of I/O we need to do.
598 # bounding it limits the amount of I/O we need to do.
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
599 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 # deltas we need to apply -- bounding it limits the amount of CPU
600 # deltas we need to apply -- bounding it limits the amount of CPU
599 # we consume.
601 # we consume.
600
602
601 textlen = revinfo.textlen
603 textlen = revinfo.textlen
602 defaultmax = textlen * 4
604 defaultmax = textlen * 4
603 maxdist = revlog._maxdeltachainspan
605 maxdist = revlog._maxdeltachainspan
604 if not maxdist:
606 if not maxdist:
605 maxdist = deltainfo.distance # ensure the conditional pass
607 maxdist = deltainfo.distance # ensure the conditional pass
606 maxdist = max(maxdist, defaultmax)
608 maxdist = max(maxdist, defaultmax)
607
609
608 # Bad delta from read span:
610 # Bad delta from read span:
609 #
611 #
610 # If the span of data read is larger than the maximum allowed.
612 # If the span of data read is larger than the maximum allowed.
611 #
613 #
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
614 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 # to avoid issue related to the span of data. In theory, it would be
615 # to avoid issue related to the span of data. In theory, it would be
614 # possible to build pathological revlog where delta pattern would lead
616 # possible to build pathological revlog where delta pattern would lead
615 # to too many reads. However, they do not happen in practice at all. So
617 # to too many reads. However, they do not happen in practice at all. So
616 # we skip the span check entirely.
618 # we skip the span check entirely.
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
619 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 return False
620 return False
619
621
620 # Bad delta from new delta size:
622 # Bad delta from new delta size:
621 #
623 #
622 # If the delta size is larger than the target text, storing the
624 # If the delta size is larger than the target text, storing the
623 # delta will be inefficient.
625 # delta will be inefficient.
624 if textlen < deltainfo.deltalen:
626 if textlen < deltainfo.deltalen:
625 return False
627 return False
626
628
627 # Bad delta from cumulated payload size:
629 # Bad delta from cumulated payload size:
628 #
630 #
629 # If the sum of delta get larger than K * target text length.
631 # If the sum of delta get larger than K * target text length.
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
632 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 return False
633 return False
632
634
633 # Bad delta from chain length:
635 # Bad delta from chain length:
634 #
636 #
635 # If the number of delta in the chain gets too high.
637 # If the number of delta in the chain gets too high.
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
638 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 return False
639 return False
638
640
639 # bad delta from intermediate snapshot size limit
641 # bad delta from intermediate snapshot size limit
640 #
642 #
641 # If an intermediate snapshot size is higher than the limit. The
643 # If an intermediate snapshot size is higher than the limit. The
642 # limit exist to prevent endless chain of intermediate delta to be
644 # limit exist to prevent endless chain of intermediate delta to be
643 # created.
645 # created.
644 if (
646 if (
645 deltainfo.snapshotdepth is not None
647 deltainfo.snapshotdepth is not None
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
648 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 ):
649 ):
648 return False
650 return False
649
651
650 # bad delta if new intermediate snapshot is larger than the previous
652 # bad delta if new intermediate snapshot is larger than the previous
651 # snapshot
653 # snapshot
652 if (
654 if (
653 deltainfo.snapshotdepth
655 deltainfo.snapshotdepth
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
656 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 ):
657 ):
656 return False
658 return False
657
659
658 return True
660 return True
659
661
660
662
661 # If a revision's full text is that much bigger than a base candidate full
663 # If a revision's full text is that much bigger than a base candidate full
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
664 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 # consider these candidates.
665 # consider these candidates.
664 LIMIT_BASE2TEXT = 500
666 LIMIT_BASE2TEXT = 500
665
667
666
668
667 def _candidategroups(
669 def _candidategroups(
668 revlog,
670 revlog,
669 textlen,
671 textlen,
670 p1,
672 p1,
671 p2,
673 p2,
672 cachedelta,
674 cachedelta,
673 excluded_bases=None,
675 excluded_bases=None,
674 target_rev=None,
676 target_rev=None,
675 snapshot_cache=None,
677 snapshot_cache=None,
676 ):
678 ):
677 """Provides group of revision to be tested as delta base
679 """Provides group of revision to be tested as delta base
678
680
679 This top level function focus on emitting groups with unique and worthwhile
681 This top level function focus on emitting groups with unique and worthwhile
680 content. See _raw_candidate_groups for details about the group order.
682 content. See _raw_candidate_groups for details about the group order.
681 """
683 """
682 # should we try to build a delta?
684 # should we try to build a delta?
683 if not (len(revlog) and revlog._storedeltachains):
685 if not (len(revlog) and revlog._storedeltachains):
684 yield None
686 yield None
685 return
687 return
686
688
687 if target_rev is None:
689 if target_rev is None:
688 target_rev = len(revlog)
690 target_rev = len(revlog)
689
691
690 if not revlog._generaldelta:
692 if not revlog._generaldelta:
691 # before general delta, there is only one possible delta base
693 # before general delta, there is only one possible delta base
692 yield (target_rev - 1,)
694 yield (target_rev - 1,)
693 yield None
695 yield None
694 return
696 return
695
697
696 if (
698 # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so
697 cachedelta is not None
699 # we should never end up asking such question. Adding the assert as a
698 and nullrev == cachedelta[0]
700 # safe-guard to detect anything that would be fishy in this regard.
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
701 assert (
700 ):
702 cachedelta is None
701 # instruction are to forcibly do a full snapshot
703 or cachedelta[2] != DELTA_BASE_REUSE_FORCE
702 yield None
704 or not revlog._generaldelta
703 return
705 )
704
706
705 deltalength = revlog.length
707 deltalength = revlog.length
706 deltaparent = revlog.deltaparent
708 deltaparent = revlog.deltaparent
707 sparse = revlog._sparserevlog
709 sparse = revlog._sparserevlog
708 good = None
710 good = None
709
711
710 deltas_limit = textlen * LIMIT_DELTA2TEXT
712 deltas_limit = textlen * LIMIT_DELTA2TEXT
711 group_chunk_size = revlog._candidate_group_chunk_size
713 group_chunk_size = revlog._candidate_group_chunk_size
712
714
713 tested = {nullrev}
715 tested = {nullrev}
714 candidates = _refinedgroups(
716 candidates = _refinedgroups(
715 revlog,
717 revlog,
716 p1,
718 p1,
717 p2,
719 p2,
718 cachedelta,
720 cachedelta,
719 snapshot_cache=snapshot_cache,
721 snapshot_cache=snapshot_cache,
720 )
722 )
721 while True:
723 while True:
722 temptative = candidates.send(good)
724 temptative = candidates.send(good)
723 if temptative is None:
725 if temptative is None:
724 break
726 break
725 group = []
727 group = []
726 for rev in temptative:
728 for rev in temptative:
727 # skip over empty delta (no need to include them in a chain)
729 # skip over empty delta (no need to include them in a chain)
728 while not (rev == nullrev or rev in tested or deltalength(rev)):
730 while not (rev == nullrev or rev in tested or deltalength(rev)):
729 tested.add(rev)
731 tested.add(rev)
730 rev = deltaparent(rev)
732 rev = deltaparent(rev)
731 # no need to try a delta against nullrev, this will be done as a
733 # no need to try a delta against nullrev, this will be done as a
732 # last resort.
734 # last resort.
733 if rev == nullrev:
735 if rev == nullrev:
734 continue
736 continue
735 # filter out revision we tested already
737 # filter out revision we tested already
736 if rev in tested:
738 if rev in tested:
737 continue
739 continue
738
740
739 if (
740 cachedelta is not None
741 and rev == cachedelta[0]
742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
743 ):
744 # instructions are to forcibly consider/use this delta base
745 group.append(rev)
746 continue
747
748 # an higher authority deamed the base unworthy (e.g. censored)
741 # an higher authority deamed the base unworthy (e.g. censored)
749 if excluded_bases is not None and rev in excluded_bases:
742 if excluded_bases is not None and rev in excluded_bases:
750 tested.add(rev)
743 tested.add(rev)
751 continue
744 continue
752 # We are in some recomputation cases and that rev is too high in
745 # We are in some recomputation cases and that rev is too high in
753 # the revlog
746 # the revlog
754 if target_rev is not None and rev >= target_rev:
747 if target_rev is not None and rev >= target_rev:
755 tested.add(rev)
748 tested.add(rev)
756 continue
749 continue
757 # filter out delta base that will never produce good delta
750 # filter out delta base that will never produce good delta
758 if deltas_limit < revlog.length(rev):
751 if deltas_limit < revlog.length(rev):
759 tested.add(rev)
752 tested.add(rev)
760 continue
753 continue
761 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
754 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
762 tested.add(rev)
755 tested.add(rev)
763 continue
756 continue
764 # no delta for rawtext-changing revs (see "candelta" for why)
757 # no delta for rawtext-changing revs (see "candelta" for why)
765 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
758 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 tested.add(rev)
759 tested.add(rev)
767 continue
760 continue
768
761
769 # If we reach here, we are about to build and test a delta.
762 # If we reach here, we are about to build and test a delta.
770 # The delta building process will compute the chaininfo in all
763 # The delta building process will compute the chaininfo in all
771 # case, since that computation is cached, it is fine to access it
764 # case, since that computation is cached, it is fine to access it
772 # here too.
765 # here too.
773 chainlen, chainsize = revlog._chaininfo(rev)
766 chainlen, chainsize = revlog._chaininfo(rev)
774 # if chain will be too long, skip base
767 # if chain will be too long, skip base
775 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
768 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
776 tested.add(rev)
769 tested.add(rev)
777 continue
770 continue
778 # if chain already have too much data, skip base
771 # if chain already have too much data, skip base
779 if deltas_limit < chainsize:
772 if deltas_limit < chainsize:
780 tested.add(rev)
773 tested.add(rev)
781 continue
774 continue
782 if sparse and revlog.upperboundcomp is not None:
775 if sparse and revlog.upperboundcomp is not None:
783 maxcomp = revlog.upperboundcomp
776 maxcomp = revlog.upperboundcomp
784 basenotsnap = (p1, p2, nullrev)
777 basenotsnap = (p1, p2, nullrev)
785 if rev not in basenotsnap and revlog.issnapshot(rev):
778 if rev not in basenotsnap and revlog.issnapshot(rev):
786 snapshotdepth = revlog.snapshotdepth(rev)
779 snapshotdepth = revlog.snapshotdepth(rev)
787 # If text is significantly larger than the base, we can
780 # If text is significantly larger than the base, we can
788 # expect the resulting delta to be proportional to the size
781 # expect the resulting delta to be proportional to the size
789 # difference
782 # difference
790 revsize = revlog.rawsize(rev)
783 revsize = revlog.rawsize(rev)
791 rawsizedistance = max(textlen - revsize, 0)
784 rawsizedistance = max(textlen - revsize, 0)
792 # use an estimate of the compression upper bound.
785 # use an estimate of the compression upper bound.
793 lowestrealisticdeltalen = rawsizedistance // maxcomp
786 lowestrealisticdeltalen = rawsizedistance // maxcomp
794
787
795 # check the absolute constraint on the delta size
788 # check the absolute constraint on the delta size
796 snapshotlimit = textlen >> snapshotdepth
789 snapshotlimit = textlen >> snapshotdepth
797 if snapshotlimit < lowestrealisticdeltalen:
790 if snapshotlimit < lowestrealisticdeltalen:
798 # delta lower bound is larger than accepted upper bound
791 # delta lower bound is larger than accepted upper bound
799 tested.add(rev)
792 tested.add(rev)
800 continue
793 continue
801
794
802 # check the relative constraint on the delta size
795 # check the relative constraint on the delta size
803 revlength = revlog.length(rev)
796 revlength = revlog.length(rev)
804 if revlength < lowestrealisticdeltalen:
797 if revlength < lowestrealisticdeltalen:
805 # delta probable lower bound is larger than target base
798 # delta probable lower bound is larger than target base
806 tested.add(rev)
799 tested.add(rev)
807 continue
800 continue
808
801
809 group.append(rev)
802 group.append(rev)
810 if group:
803 if group:
811 # When the size of the candidate group is big, it can result in a
804 # When the size of the candidate group is big, it can result in a
812 # quite significant performance impact. To reduce this, we can send
805 # quite significant performance impact. To reduce this, we can send
813 # them in smaller batches until the new batch does not provide any
806 # them in smaller batches until the new batch does not provide any
814 # improvements.
807 # improvements.
815 #
808 #
816 # This might reduce the overall efficiency of the compression in
809 # This might reduce the overall efficiency of the compression in
817 # some corner cases, but that should also prevent very pathological
810 # some corner cases, but that should also prevent very pathological
818 # cases from being an issue. (eg. 20 000 candidates).
811 # cases from being an issue. (eg. 20 000 candidates).
819 #
812 #
820 # XXX note that the ordering of the group becomes important as it
813 # XXX note that the ordering of the group becomes important as it
821 # now impacts the final result. The current order is unprocessed
814 # now impacts the final result. The current order is unprocessed
822 # and can be improved.
815 # and can be improved.
823 if group_chunk_size == 0:
816 if group_chunk_size == 0:
824 tested.update(group)
817 tested.update(group)
825 good = yield tuple(group)
818 good = yield tuple(group)
826 else:
819 else:
827 prev_good = good
820 prev_good = good
828 for start in range(0, len(group), group_chunk_size):
821 for start in range(0, len(group), group_chunk_size):
829 sub_group = group[start : start + group_chunk_size]
822 sub_group = group[start : start + group_chunk_size]
830 tested.update(sub_group)
823 tested.update(sub_group)
831 good = yield tuple(sub_group)
824 good = yield tuple(sub_group)
832 if prev_good == good:
825 if prev_good == good:
833 break
826 break
834
827
835 yield None
828 yield None
836
829
837
830
838 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
831 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
839 good = None
832 good = None
840 # First we try to reuse a the delta contained in the bundle.
833 # First we try to reuse a the delta contained in the bundle.
841 # (or from the source revlog)
834 # (or from the source revlog)
842 #
835 #
843 # This logic only applies to general delta repositories and can be disabled
836 # This logic only applies to general delta repositories and can be disabled
844 # through configuration. Disabling reuse source delta is useful when
837 # through configuration. Disabling reuse source delta is useful when
845 # we want to make sure we recomputed "optimal" deltas.
838 # we want to make sure we recomputed "optimal" deltas.
846 debug_info = None
839 debug_info = None
847 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
840 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
848 # Assume what we received from the server is a good choice
841 # Assume what we received from the server is a good choice
849 # build delta will reuse the cache
842 # build delta will reuse the cache
850 if debug_info is not None:
843 if debug_info is not None:
851 debug_info['cached-delta.tested'] += 1
844 debug_info['cached-delta.tested'] += 1
852 good = yield (cachedelta[0],)
845 good = yield (cachedelta[0],)
853 if good is not None:
846 if good is not None:
854 if debug_info is not None:
847 if debug_info is not None:
855 debug_info['cached-delta.accepted'] += 1
848 debug_info['cached-delta.accepted'] += 1
856 yield None
849 yield None
857 return
850 return
858 if snapshot_cache is None:
851 if snapshot_cache is None:
859 snapshot_cache = SnapshotCache()
852 snapshot_cache = SnapshotCache()
860 groups = _rawgroups(
853 groups = _rawgroups(
861 revlog,
854 revlog,
862 p1,
855 p1,
863 p2,
856 p2,
864 cachedelta,
857 cachedelta,
865 snapshot_cache,
858 snapshot_cache,
866 )
859 )
867 for candidates in groups:
860 for candidates in groups:
868 good = yield candidates
861 good = yield candidates
869 if good is not None:
862 if good is not None:
870 break
863 break
871
864
872 # If sparse revlog is enabled, we can try to refine the available deltas
865 # If sparse revlog is enabled, we can try to refine the available deltas
873 if not revlog._sparserevlog:
866 if not revlog._sparserevlog:
874 yield None
867 yield None
875 return
868 return
876
869
877 # if we have a refinable value, try to refine it
870 # if we have a refinable value, try to refine it
878 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
871 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
879 # refine snapshot down
872 # refine snapshot down
880 previous = None
873 previous = None
881 while previous != good:
874 while previous != good:
882 previous = good
875 previous = good
883 base = revlog.deltaparent(good)
876 base = revlog.deltaparent(good)
884 if base == nullrev:
877 if base == nullrev:
885 break
878 break
886 good = yield (base,)
879 good = yield (base,)
887 # refine snapshot up
880 # refine snapshot up
888 if not snapshot_cache.snapshots:
881 if not snapshot_cache.snapshots:
889 snapshot_cache.update(revlog, good + 1)
882 snapshot_cache.update(revlog, good + 1)
890 previous = None
883 previous = None
891 while good != previous:
884 while good != previous:
892 previous = good
885 previous = good
893 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
886 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
894 good = yield children
887 good = yield children
895
888
896 if debug_info is not None:
889 if debug_info is not None:
897 if good is None:
890 if good is None:
898 debug_info['no-solution'] += 1
891 debug_info['no-solution'] += 1
899
892
900 yield None
893 yield None
901
894
902
895
903 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
896 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
904 """Provides group of revision to be tested as delta base
897 """Provides group of revision to be tested as delta base
905
898
906 This lower level function focus on emitting delta theorically interresting
899 This lower level function focus on emitting delta theorically interresting
907 without looking it any practical details.
900 without looking it any practical details.
908
901
909 The group order aims at providing fast or small candidates first.
902 The group order aims at providing fast or small candidates first.
910 """
903 """
911 # Why search for delta base if we cannot use a delta base ?
904 # Why search for delta base if we cannot use a delta base ?
912 assert revlog._generaldelta
905 assert revlog._generaldelta
913 # also see issue6056
906 # also see issue6056
914 sparse = revlog._sparserevlog
907 sparse = revlog._sparserevlog
915 curr = len(revlog)
908 curr = len(revlog)
916 prev = curr - 1
909 prev = curr - 1
917 deltachain = lambda rev: revlog._deltachain(rev)[0]
910 deltachain = lambda rev: revlog._deltachain(rev)[0]
918
911
919 # exclude already lazy tested base if any
912 # exclude already lazy tested base if any
920 parents = [p for p in (p1, p2) if p != nullrev]
913 parents = [p for p in (p1, p2) if p != nullrev]
921
914
922 if not revlog._deltabothparents and len(parents) == 2:
915 if not revlog._deltabothparents and len(parents) == 2:
923 parents.sort()
916 parents.sort()
924 # To minimize the chance of having to build a fulltext,
917 # To minimize the chance of having to build a fulltext,
925 # pick first whichever parent is closest to us (max rev)
918 # pick first whichever parent is closest to us (max rev)
926 yield (parents[1],)
919 yield (parents[1],)
927 # then the other one (min rev) if the first did not fit
920 # then the other one (min rev) if the first did not fit
928 yield (parents[0],)
921 yield (parents[0],)
929 elif len(parents) > 0:
922 elif len(parents) > 0:
930 # Test all parents (1 or 2), and keep the best candidate
923 # Test all parents (1 or 2), and keep the best candidate
931 yield parents
924 yield parents
932
925
933 if sparse and parents:
926 if sparse and parents:
934 if snapshot_cache is None:
927 if snapshot_cache is None:
935 # map: base-rev: [snapshot-revs]
928 # map: base-rev: [snapshot-revs]
936 snapshot_cache = SnapshotCache()
929 snapshot_cache = SnapshotCache()
937 # See if we can use an existing snapshot in the parent chains to use as
930 # See if we can use an existing snapshot in the parent chains to use as
938 # a base for a new intermediate-snapshot
931 # a base for a new intermediate-snapshot
939 #
932 #
940 # search for snapshot in parents delta chain
933 # search for snapshot in parents delta chain
941 # map: snapshot-level: snapshot-rev
934 # map: snapshot-level: snapshot-rev
942 parents_snaps = collections.defaultdict(set)
935 parents_snaps = collections.defaultdict(set)
943 candidate_chains = [deltachain(p) for p in parents]
936 candidate_chains = [deltachain(p) for p in parents]
944 for chain in candidate_chains:
937 for chain in candidate_chains:
945 for idx, s in enumerate(chain):
938 for idx, s in enumerate(chain):
946 if not revlog.issnapshot(s):
939 if not revlog.issnapshot(s):
947 break
940 break
948 parents_snaps[idx].add(s)
941 parents_snaps[idx].add(s)
949 snapfloor = min(parents_snaps[0]) + 1
942 snapfloor = min(parents_snaps[0]) + 1
950 snapshot_cache.update(revlog, snapfloor)
943 snapshot_cache.update(revlog, snapfloor)
951 # search for the highest "unrelated" revision
944 # search for the highest "unrelated" revision
952 #
945 #
953 # Adding snapshots used by "unrelated" revision increase the odd we
946 # Adding snapshots used by "unrelated" revision increase the odd we
954 # reuse an independant, yet better snapshot chain.
947 # reuse an independant, yet better snapshot chain.
955 #
948 #
956 # XXX instead of building a set of revisions, we could lazily enumerate
949 # XXX instead of building a set of revisions, we could lazily enumerate
957 # over the chains. That would be more efficient, however we stick to
950 # over the chains. That would be more efficient, however we stick to
958 # simple code for now.
951 # simple code for now.
959 all_revs = set()
952 all_revs = set()
960 for chain in candidate_chains:
953 for chain in candidate_chains:
961 all_revs.update(chain)
954 all_revs.update(chain)
962 other = None
955 other = None
963 for r in revlog.revs(prev, snapfloor):
956 for r in revlog.revs(prev, snapfloor):
964 if r not in all_revs:
957 if r not in all_revs:
965 other = r
958 other = r
966 break
959 break
967 if other is not None:
960 if other is not None:
968 # To avoid unfair competition, we won't use unrelated intermediate
961 # To avoid unfair competition, we won't use unrelated intermediate
969 # snapshot that are deeper than the ones from the parent delta
962 # snapshot that are deeper than the ones from the parent delta
970 # chain.
963 # chain.
971 max_depth = max(parents_snaps.keys())
964 max_depth = max(parents_snaps.keys())
972 chain = deltachain(other)
965 chain = deltachain(other)
973 for depth, s in enumerate(chain):
966 for depth, s in enumerate(chain):
974 if s < snapfloor:
967 if s < snapfloor:
975 continue
968 continue
976 if max_depth < depth:
969 if max_depth < depth:
977 break
970 break
978 if not revlog.issnapshot(s):
971 if not revlog.issnapshot(s):
979 break
972 break
980 parents_snaps[depth].add(s)
973 parents_snaps[depth].add(s)
981 # Test them as possible intermediate snapshot base
974 # Test them as possible intermediate snapshot base
982 # We test them from highest to lowest level. High level one are more
975 # We test them from highest to lowest level. High level one are more
983 # likely to result in small delta
976 # likely to result in small delta
984 floor = None
977 floor = None
985 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
978 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
986 siblings = set()
979 siblings = set()
987 for s in snaps:
980 for s in snaps:
988 siblings.update(snapshot_cache.snapshots[s])
981 siblings.update(snapshot_cache.snapshots[s])
989 # Before considering making a new intermediate snapshot, we check
982 # Before considering making a new intermediate snapshot, we check
990 # if an existing snapshot, children of base we consider, would be
983 # if an existing snapshot, children of base we consider, would be
991 # suitable.
984 # suitable.
992 #
985 #
993 # It give a change to reuse a delta chain "unrelated" to the
986 # It give a change to reuse a delta chain "unrelated" to the
994 # current revision instead of starting our own. Without such
987 # current revision instead of starting our own. Without such
995 # re-use, topological branches would keep reopening new chains.
988 # re-use, topological branches would keep reopening new chains.
996 # Creating more and more snapshot as the repository grow.
989 # Creating more and more snapshot as the repository grow.
997
990
998 if floor is not None:
991 if floor is not None:
999 # We only do this for siblings created after the one in our
992 # We only do this for siblings created after the one in our
1000 # parent's delta chain. Those created before has less chances
993 # parent's delta chain. Those created before has less chances
1001 # to be valid base since our ancestors had to create a new
994 # to be valid base since our ancestors had to create a new
1002 # snapshot.
995 # snapshot.
1003 siblings = [r for r in siblings if floor < r]
996 siblings = [r for r in siblings if floor < r]
1004 yield tuple(sorted(siblings))
997 yield tuple(sorted(siblings))
1005 # then test the base from our parent's delta chain.
998 # then test the base from our parent's delta chain.
1006 yield tuple(sorted(snaps))
999 yield tuple(sorted(snaps))
1007 floor = min(snaps)
1000 floor = min(snaps)
1008 # No suitable base found in the parent chain, search if any full
1001 # No suitable base found in the parent chain, search if any full
1009 # snapshots emitted since parent's base would be a suitable base for an
1002 # snapshots emitted since parent's base would be a suitable base for an
1010 # intermediate snapshot.
1003 # intermediate snapshot.
1011 #
1004 #
1012 # It give a chance to reuse a delta chain unrelated to the current
1005 # It give a chance to reuse a delta chain unrelated to the current
1013 # revisions instead of starting our own. Without such re-use,
1006 # revisions instead of starting our own. Without such re-use,
1014 # topological branches would keep reopening new full chains. Creating
1007 # topological branches would keep reopening new full chains. Creating
1015 # more and more snapshot as the repository grow.
1008 # more and more snapshot as the repository grow.
1016 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1009 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1017 yield tuple(sorted(full))
1010 yield tuple(sorted(full))
1018
1011
1019 if not sparse:
1012 if not sparse:
1020 # other approach failed try against prev to hopefully save us a
1013 # other approach failed try against prev to hopefully save us a
1021 # fulltext.
1014 # fulltext.
1022 yield (prev,)
1015 yield (prev,)
1023
1016
1024
1017
1025 class SnapshotCache:
1018 class SnapshotCache:
1026 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1019 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1027
1020
1028 def __init__(self):
1021 def __init__(self):
1029 self.snapshots = collections.defaultdict(set)
1022 self.snapshots = collections.defaultdict(set)
1030 self._start_rev = None
1023 self._start_rev = None
1031 self._end_rev = None
1024 self._end_rev = None
1032
1025
1033 def update(self, revlog, start_rev=0):
1026 def update(self, revlog, start_rev=0):
1034 """find snapshots from start_rev to tip"""
1027 """find snapshots from start_rev to tip"""
1035 nb_revs = len(revlog)
1028 nb_revs = len(revlog)
1036 end_rev = nb_revs - 1
1029 end_rev = nb_revs - 1
1037 if start_rev > end_rev:
1030 if start_rev > end_rev:
1038 return # range is empty
1031 return # range is empty
1039
1032
1040 if self._start_rev is None:
1033 if self._start_rev is None:
1041 assert self._end_rev is None
1034 assert self._end_rev is None
1042 self._update(revlog, start_rev, end_rev)
1035 self._update(revlog, start_rev, end_rev)
1043 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1036 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1044 if start_rev < self._start_rev:
1037 if start_rev < self._start_rev:
1045 self._update(revlog, start_rev, self._start_rev - 1)
1038 self._update(revlog, start_rev, self._start_rev - 1)
1046 if self._end_rev < end_rev:
1039 if self._end_rev < end_rev:
1047 self._update(revlog, self._end_rev + 1, end_rev)
1040 self._update(revlog, self._end_rev + 1, end_rev)
1048
1041
1049 if self._start_rev is None:
1042 if self._start_rev is None:
1050 assert self._end_rev is None
1043 assert self._end_rev is None
1051 self._end_rev = end_rev
1044 self._end_rev = end_rev
1052 self._start_rev = start_rev
1045 self._start_rev = start_rev
1053 else:
1046 else:
1054 self._start_rev = min(self._start_rev, start_rev)
1047 self._start_rev = min(self._start_rev, start_rev)
1055 self._end_rev = max(self._end_rev, end_rev)
1048 self._end_rev = max(self._end_rev, end_rev)
1056 assert self._start_rev <= self._end_rev, (
1049 assert self._start_rev <= self._end_rev, (
1057 self._start_rev,
1050 self._start_rev,
1058 self._end_rev,
1051 self._end_rev,
1059 )
1052 )
1060
1053
1061 def _update(self, revlog, start_rev, end_rev):
1054 def _update(self, revlog, start_rev, end_rev):
1062 """internal method that actually do update content"""
1055 """internal method that actually do update content"""
1063 assert self._start_rev is None or (
1056 assert self._start_rev is None or (
1064 start_rev < self._start_rev or start_rev > self._end_rev
1057 start_rev < self._start_rev or start_rev > self._end_rev
1065 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1058 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1066 assert self._start_rev is None or (
1059 assert self._start_rev is None or (
1067 end_rev < self._start_rev or end_rev > self._end_rev
1060 end_rev < self._start_rev or end_rev > self._end_rev
1068 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1061 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1069 cache = self.snapshots
1062 cache = self.snapshots
1070 if util.safehasattr(revlog.index, 'findsnapshots'):
1063 if util.safehasattr(revlog.index, 'findsnapshots'):
1071 revlog.index.findsnapshots(cache, start_rev, end_rev)
1064 revlog.index.findsnapshots(cache, start_rev, end_rev)
1072 else:
1065 else:
1073 deltaparent = revlog.deltaparent
1066 deltaparent = revlog.deltaparent
1074 issnapshot = revlog.issnapshot
1067 issnapshot = revlog.issnapshot
1075 for rev in revlog.revs(start_rev, end_rev):
1068 for rev in revlog.revs(start_rev, end_rev):
1076 if issnapshot(rev):
1069 if issnapshot(rev):
1077 cache[deltaparent(rev)].add(rev)
1070 cache[deltaparent(rev)].add(rev)
1078
1071
1079
1072
1080 class deltacomputer:
1073 class deltacomputer:
1081 def __init__(
1074 def __init__(
1082 self,
1075 self,
1083 revlog,
1076 revlog,
1084 write_debug=None,
1077 write_debug=None,
1085 debug_search=False,
1078 debug_search=False,
1086 debug_info=None,
1079 debug_info=None,
1087 ):
1080 ):
1088 self.revlog = revlog
1081 self.revlog = revlog
1089 self._write_debug = write_debug
1082 self._write_debug = write_debug
1090 if write_debug is None:
1083 if write_debug is None:
1091 self._debug_search = False
1084 self._debug_search = False
1092 else:
1085 else:
1093 self._debug_search = debug_search
1086 self._debug_search = debug_search
1094 self._debug_info = debug_info
1087 self._debug_info = debug_info
1095 self._snapshot_cache = SnapshotCache()
1088 self._snapshot_cache = SnapshotCache()
1096
1089
1097 @property
1090 @property
1098 def _gather_debug(self):
1091 def _gather_debug(self):
1099 return self._write_debug is not None or self._debug_info is not None
1092 return self._write_debug is not None or self._debug_info is not None
1100
1093
1101 def buildtext(self, revinfo, fh):
1094 def buildtext(self, revinfo, fh):
1102 """Builds a fulltext version of a revision
1095 """Builds a fulltext version of a revision
1103
1096
1104 revinfo: revisioninfo instance that contains all needed info
1097 revinfo: revisioninfo instance that contains all needed info
1105 fh: file handle to either the .i or the .d revlog file,
1098 fh: file handle to either the .i or the .d revlog file,
1106 depending on whether it is inlined or not
1099 depending on whether it is inlined or not
1107 """
1100 """
1108 btext = revinfo.btext
1101 btext = revinfo.btext
1109 if btext[0] is not None:
1102 if btext[0] is not None:
1110 return btext[0]
1103 return btext[0]
1111
1104
1112 revlog = self.revlog
1105 revlog = self.revlog
1113 cachedelta = revinfo.cachedelta
1106 cachedelta = revinfo.cachedelta
1114 baserev = cachedelta[0]
1107 baserev = cachedelta[0]
1115 delta = cachedelta[1]
1108 delta = cachedelta[1]
1116
1109
1117 fulltext = btext[0] = _textfromdelta(
1110 fulltext = btext[0] = _textfromdelta(
1118 fh,
1111 fh,
1119 revlog,
1112 revlog,
1120 baserev,
1113 baserev,
1121 delta,
1114 delta,
1122 revinfo.p1,
1115 revinfo.p1,
1123 revinfo.p2,
1116 revinfo.p2,
1124 revinfo.flags,
1117 revinfo.flags,
1125 revinfo.node,
1118 revinfo.node,
1126 )
1119 )
1127 return fulltext
1120 return fulltext
1128
1121
1129 def _builddeltadiff(self, base, revinfo, fh):
1122 def _builddeltadiff(self, base, revinfo, fh):
1130 revlog = self.revlog
1123 revlog = self.revlog
1131 t = self.buildtext(revinfo, fh)
1124 t = self.buildtext(revinfo, fh)
1132 if revlog.iscensored(base):
1125 if revlog.iscensored(base):
1133 # deltas based on a censored revision must replace the
1126 # deltas based on a censored revision must replace the
1134 # full content in one patch, so delta works everywhere
1127 # full content in one patch, so delta works everywhere
1135 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1128 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1136 delta = header + t
1129 delta = header + t
1137 else:
1130 else:
1138 ptext = revlog.rawdata(base, _df=fh)
1131 ptext = revlog.rawdata(base, _df=fh)
1139 delta = mdiff.textdiff(ptext, t)
1132 delta = mdiff.textdiff(ptext, t)
1140
1133
1141 return delta
1134 return delta
1142
1135
1143 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1136 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1144 # can we use the cached delta?
1137 # can we use the cached delta?
1145 revlog = self.revlog
1138 revlog = self.revlog
1146 chainbase = revlog.chainbase(base)
1139 chainbase = revlog.chainbase(base)
1147 if revlog._generaldelta:
1140 if revlog._generaldelta:
1148 deltabase = base
1141 deltabase = base
1149 else:
1142 else:
1150 if target_rev is not None and base != target_rev - 1:
1143 if target_rev is not None and base != target_rev - 1:
1151 msg = (
1144 msg = (
1152 b'general delta cannot use delta for something else '
1145 b'general delta cannot use delta for something else '
1153 b'than `prev`: %d<-%d'
1146 b'than `prev`: %d<-%d'
1154 )
1147 )
1155 msg %= (base, target_rev)
1148 msg %= (base, target_rev)
1156 raise error.ProgrammingError(msg)
1149 raise error.ProgrammingError(msg)
1157 deltabase = chainbase
1150 deltabase = chainbase
1158 snapshotdepth = None
1151 snapshotdepth = None
1159 if revlog._sparserevlog and deltabase == nullrev:
1152 if revlog._sparserevlog and deltabase == nullrev:
1160 snapshotdepth = 0
1153 snapshotdepth = 0
1161 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1154 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1162 # A delta chain should always be one full snapshot,
1155 # A delta chain should always be one full snapshot,
1163 # zero or more semi-snapshots, and zero or more deltas
1156 # zero or more semi-snapshots, and zero or more deltas
1164 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1157 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1165 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1158 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1166 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1159 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1167 delta = None
1160 delta = None
1168 if revinfo.cachedelta:
1161 if revinfo.cachedelta:
1169 cachebase = revinfo.cachedelta[0]
1162 cachebase = revinfo.cachedelta[0]
1170 # check if the diff still apply
1163 # check if the diff still apply
1171 currentbase = cachebase
1164 currentbase = cachebase
1172 while (
1165 while (
1173 currentbase != nullrev
1166 currentbase != nullrev
1174 and currentbase != base
1167 and currentbase != base
1175 and self.revlog.length(currentbase) == 0
1168 and self.revlog.length(currentbase) == 0
1176 ):
1169 ):
1177 currentbase = self.revlog.deltaparent(currentbase)
1170 currentbase = self.revlog.deltaparent(currentbase)
1178 if self.revlog._lazydelta and currentbase == base:
1171 if self.revlog._lazydelta and currentbase == base:
1179 delta = revinfo.cachedelta[1]
1172 delta = revinfo.cachedelta[1]
1180 if delta is None:
1173 if delta is None:
1181 delta = self._builddeltadiff(base, revinfo, fh)
1174 delta = self._builddeltadiff(base, revinfo, fh)
1182 if self._debug_search:
1175 if self._debug_search:
1183 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1176 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1184 msg %= len(delta)
1177 msg %= len(delta)
1185 self._write_debug(msg)
1178 self._write_debug(msg)
1186 # snapshotdept need to be neither None nor 0 level snapshot
1179 # snapshotdept need to be neither None nor 0 level snapshot
1187 if revlog.upperboundcomp is not None and snapshotdepth:
1180 if revlog.upperboundcomp is not None and snapshotdepth:
1188 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1181 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1189 snapshotlimit = revinfo.textlen >> snapshotdepth
1182 snapshotlimit = revinfo.textlen >> snapshotdepth
1190 if self._debug_search:
1183 if self._debug_search:
1191 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1184 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1192 msg %= lowestrealisticdeltalen
1185 msg %= lowestrealisticdeltalen
1193 self._write_debug(msg)
1186 self._write_debug(msg)
1194 if snapshotlimit < lowestrealisticdeltalen:
1187 if snapshotlimit < lowestrealisticdeltalen:
1195 if self._debug_search:
1188 if self._debug_search:
1196 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1189 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1197 self._write_debug(msg)
1190 self._write_debug(msg)
1198 return None
1191 return None
1199 if revlog.length(base) < lowestrealisticdeltalen:
1192 if revlog.length(base) < lowestrealisticdeltalen:
1200 if self._debug_search:
1193 if self._debug_search:
1201 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1194 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1202 self._write_debug(msg)
1195 self._write_debug(msg)
1203 return None
1196 return None
1204 header, data = revlog.compress(delta)
1197 header, data = revlog.compress(delta)
1205 deltalen = len(header) + len(data)
1198 deltalen = len(header) + len(data)
1206 offset = revlog.end(len(revlog) - 1)
1199 offset = revlog.end(len(revlog) - 1)
1207 dist = deltalen + offset - revlog.start(chainbase)
1200 dist = deltalen + offset - revlog.start(chainbase)
1208 chainlen, compresseddeltalen = revlog._chaininfo(base)
1201 chainlen, compresseddeltalen = revlog._chaininfo(base)
1209 chainlen += 1
1202 chainlen += 1
1210 compresseddeltalen += deltalen
1203 compresseddeltalen += deltalen
1211
1204
1212 return _deltainfo(
1205 return _deltainfo(
1213 dist,
1206 dist,
1214 deltalen,
1207 deltalen,
1215 (header, data),
1208 (header, data),
1216 deltabase,
1209 deltabase,
1217 chainbase,
1210 chainbase,
1218 chainlen,
1211 chainlen,
1219 compresseddeltalen,
1212 compresseddeltalen,
1220 snapshotdepth,
1213 snapshotdepth,
1221 )
1214 )
1222
1215
1223 def _fullsnapshotinfo(self, fh, revinfo, curr):
1216 def _fullsnapshotinfo(self, fh, revinfo, curr):
1224 rawtext = self.buildtext(revinfo, fh)
1217 rawtext = self.buildtext(revinfo, fh)
1225 data = self.revlog.compress(rawtext)
1218 data = self.revlog.compress(rawtext)
1226 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1219 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1227 deltabase = chainbase = curr
1220 deltabase = chainbase = curr
1228 snapshotdepth = 0
1221 snapshotdepth = 0
1229 chainlen = 1
1222 chainlen = 1
1230
1223
1231 return _deltainfo(
1224 return _deltainfo(
1232 dist,
1225 dist,
1233 deltalen,
1226 deltalen,
1234 data,
1227 data,
1235 deltabase,
1228 deltabase,
1236 chainbase,
1229 chainbase,
1237 chainlen,
1230 chainlen,
1238 compresseddeltalen,
1231 compresseddeltalen,
1239 snapshotdepth,
1232 snapshotdepth,
1240 )
1233 )
1241
1234
1242 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1235 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1243 """Find an acceptable delta against a candidate revision
1236 """Find an acceptable delta against a candidate revision
1244
1237
1245 revinfo: information about the revision (instance of _revisioninfo)
1238 revinfo: information about the revision (instance of _revisioninfo)
1246 fh: file handle to either the .i or the .d revlog file,
1239 fh: file handle to either the .i or the .d revlog file,
1247 depending on whether it is inlined or not
1240 depending on whether it is inlined or not
1248
1241
1249 Returns the first acceptable candidate revision, as ordered by
1242 Returns the first acceptable candidate revision, as ordered by
1250 _candidategroups
1243 _candidategroups
1251
1244
1252 If no suitable deltabase is found, we return delta info for a full
1245 If no suitable deltabase is found, we return delta info for a full
1253 snapshot.
1246 snapshot.
1254
1247
1255 `excluded_bases` is an optional set of revision that cannot be used as
1248 `excluded_bases` is an optional set of revision that cannot be used as
1256 a delta base. Use this to recompute delta suitable in censor or strip
1249 a delta base. Use this to recompute delta suitable in censor or strip
1257 context.
1250 context.
1258 """
1251 """
1259 if target_rev is None:
1252 if target_rev is None:
1260 target_rev = len(self.revlog)
1253 target_rev = len(self.revlog)
1261
1254
1262 gather_debug = self._gather_debug
1255 gather_debug = self._gather_debug
1263 cachedelta = revinfo.cachedelta
1256 cachedelta = revinfo.cachedelta
1264 revlog = self.revlog
1257 revlog = self.revlog
1265 p1r = p2r = None
1258 p1r = p2r = None
1266
1259
1267 if excluded_bases is None:
1260 if excluded_bases is None:
1268 excluded_bases = set()
1261 excluded_bases = set()
1269
1262
1270 if gather_debug:
1263 if gather_debug:
1271 start = util.timer()
1264 start = util.timer()
1272 dbg = self._one_dbg_data()
1265 dbg = self._one_dbg_data()
1273 dbg['revision'] = target_rev
1266 dbg['revision'] = target_rev
1274 target_revlog = b"UNKNOWN"
1267 target_revlog = b"UNKNOWN"
1275 target_type = self.revlog.target[0]
1268 target_type = self.revlog.target[0]
1276 target_key = self.revlog.target[1]
1269 target_key = self.revlog.target[1]
1277 if target_type == KIND_CHANGELOG:
1270 if target_type == KIND_CHANGELOG:
1278 target_revlog = b'CHANGELOG:'
1271 target_revlog = b'CHANGELOG:'
1279 elif target_type == KIND_MANIFESTLOG:
1272 elif target_type == KIND_MANIFESTLOG:
1280 target_revlog = b'MANIFESTLOG:'
1273 target_revlog = b'MANIFESTLOG:'
1281 if target_key:
1274 if target_key:
1282 target_revlog += b'%s:' % target_key
1275 target_revlog += b'%s:' % target_key
1283 elif target_type == KIND_FILELOG:
1276 elif target_type == KIND_FILELOG:
1284 target_revlog = b'FILELOG:'
1277 target_revlog = b'FILELOG:'
1285 if target_key:
1278 if target_key:
1286 target_revlog += b'%s:' % target_key
1279 target_revlog += b'%s:' % target_key
1287 dbg['target-revlog'] = target_revlog
1280 dbg['target-revlog'] = target_revlog
1288 p1r = revlog.rev(revinfo.p1)
1281 p1r = revlog.rev(revinfo.p1)
1289 p2r = revlog.rev(revinfo.p2)
1282 p2r = revlog.rev(revinfo.p2)
1290 if p1r != nullrev:
1283 if p1r != nullrev:
1291 p1_chain_len = revlog._chaininfo(p1r)[0]
1284 p1_chain_len = revlog._chaininfo(p1r)[0]
1292 else:
1285 else:
1293 p1_chain_len = -1
1286 p1_chain_len = -1
1294 if p2r != nullrev:
1287 if p2r != nullrev:
1295 p2_chain_len = revlog._chaininfo(p2r)[0]
1288 p2_chain_len = revlog._chaininfo(p2r)[0]
1296 else:
1289 else:
1297 p2_chain_len = -1
1290 p2_chain_len = -1
1298 dbg['p1-chain-len'] = p1_chain_len
1291 dbg['p1-chain-len'] = p1_chain_len
1299 dbg['p2-chain-len'] = p2_chain_len
1292 dbg['p2-chain-len'] = p2_chain_len
1300
1293
1301 # 1) if the revision is empty, no amount of delta can beat it
1294 # 1) if the revision is empty, no amount of delta can beat it
1302 #
1295 #
1303 # 2) no delta for flag processor revision (see "candelta" for why)
1296 # 2) no delta for flag processor revision (see "candelta" for why)
1304 # not calling candelta since only one revision needs test, also to
1297 # not calling candelta since only one revision needs test, also to
1305 # avoid overhead fetching flags again.
1298 # avoid overhead fetching flags again.
1306 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1299 if not revinfo.textlen or revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1307 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1300 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1308 if gather_debug:
1301 if gather_debug:
1309 end = util.timer()
1302 end = util.timer()
1310 dbg['duration'] = end - start
1303 dbg['duration'] = end - start
1311 dbg[
1304 dbg[
1312 'delta-base'
1305 'delta-base'
1313 ] = deltainfo.base # pytype: disable=attribute-error
1306 ] = deltainfo.base # pytype: disable=attribute-error
1314 dbg['search_round_count'] = 0
1307 dbg['search_round_count'] = 0
1315 dbg['using-cached-base'] = False
1308 dbg['using-cached-base'] = False
1316 dbg['delta_try_count'] = 0
1309 dbg['delta_try_count'] = 0
1317 dbg['type'] = b"full"
1310 dbg['type'] = b"full"
1318 dbg['snapshot-depth'] = 0
1311 dbg['snapshot-depth'] = 0
1319 self._dbg_process_data(dbg)
1312 self._dbg_process_data(dbg)
1320 return deltainfo
1313 return deltainfo
1321
1314
1322 deltainfo = None
1315 deltainfo = None
1323
1316
1324 # If this source delta are to be forcibly reuse, let us comply early.
1317 # If this source delta are to be forcibly reuse, let us comply early.
1325 if (
1318 if (
1326 revlog._generaldelta
1319 revlog._generaldelta
1327 and revinfo.cachedelta is not None
1320 and revinfo.cachedelta is not None
1328 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1321 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
1329 ):
1322 ):
1330 base = revinfo.cachedelta[0]
1323 base = revinfo.cachedelta[0]
1331 if base == nullrev:
1324 if base == nullrev:
1332 dbg_type = b"full"
1325 dbg_type = b"full"
1333 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1326 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1334 if gather_debug:
1327 if gather_debug:
1335 snapshotdepth = 0
1328 snapshotdepth = 0
1336 elif base not in excluded_bases:
1329 elif base not in excluded_bases:
1337 delta = revinfo.cachedelta[1]
1330 delta = revinfo.cachedelta[1]
1338 header, data = revlog.compress(delta)
1331 header, data = revlog.compress(delta)
1339 deltalen = len(header) + len(data)
1332 deltalen = len(header) + len(data)
1340 if gather_debug:
1333 if gather_debug:
1341 offset = revlog.end(len(revlog) - 1)
1334 offset = revlog.end(len(revlog) - 1)
1342 chainbase = revlog.chainbase(base)
1335 chainbase = revlog.chainbase(base)
1343 distance = deltalen + offset - revlog.start(chainbase)
1336 distance = deltalen + offset - revlog.start(chainbase)
1344 chainlen, compresseddeltalen = revlog._chaininfo(base)
1337 chainlen, compresseddeltalen = revlog._chaininfo(base)
1345 chainlen += 1
1338 chainlen += 1
1346 compresseddeltalen += deltalen
1339 compresseddeltalen += deltalen
1347 if base == p1r or base == p2r:
1340 if base == p1r or base == p2r:
1348 dbg_type = b"delta"
1341 dbg_type = b"delta"
1349 snapshotdepth = None
1342 snapshotdepth = None
1350 elif not revlog.issnapshot(base):
1343 elif not revlog.issnapshot(base):
1351 snapshotdepth = None
1344 snapshotdepth = None
1352 else:
1345 else:
1353 dbg_type = b"snapshot"
1346 dbg_type = b"snapshot"
1354 snapshotdepth = revlog.snapshotdepth(base) + 1
1347 snapshotdepth = revlog.snapshotdepth(base) + 1
1355 else:
1348 else:
1356 distance = None
1349 distance = None
1357 chainbase = None
1350 chainbase = None
1358 chainlen = None
1351 chainlen = None
1359 compresseddeltalen = None
1352 compresseddeltalen = None
1360 snapshotdepth = None
1353 snapshotdepth = None
1361 deltainfo = _deltainfo(
1354 deltainfo = _deltainfo(
1362 distance=distance,
1355 distance=distance,
1363 deltalen=deltalen,
1356 deltalen=deltalen,
1364 data=(header, data),
1357 data=(header, data),
1365 base=base,
1358 base=base,
1366 chainbase=chainbase,
1359 chainbase=chainbase,
1367 chainlen=chainlen,
1360 chainlen=chainlen,
1368 compresseddeltalen=compresseddeltalen,
1361 compresseddeltalen=compresseddeltalen,
1369 snapshotdepth=snapshotdepth,
1362 snapshotdepth=snapshotdepth,
1370 )
1363 )
1371
1364
1372 if deltainfo is not None:
1365 if deltainfo is not None:
1373 if gather_debug:
1366 if gather_debug:
1374 end = util.timer()
1367 end = util.timer()
1375 dbg['duration'] = end - start
1368 dbg['duration'] = end - start
1376 dbg[
1369 dbg[
1377 'delta-base'
1370 'delta-base'
1378 ] = deltainfo.base # pytype: disable=attribute-error
1371 ] = deltainfo.base # pytype: disable=attribute-error
1379 dbg['search_round_count'] = 0
1372 dbg['search_round_count'] = 0
1380 dbg['using-cached-base'] = True
1373 dbg['using-cached-base'] = True
1381 dbg['delta_try_count'] = 0
1374 dbg['delta_try_count'] = 0
1382 dbg['type'] = b"full"
1375 dbg['type'] = b"full"
1383 if snapshotdepth is None:
1376 if snapshotdepth is None:
1384 dbg['snapshot-depth'] = 0
1377 dbg['snapshot-depth'] = 0
1385 else:
1378 else:
1386 dbg['snapshot-depth'] = snapshotdepth
1379 dbg['snapshot-depth'] = snapshotdepth
1387 self._dbg_process_data(dbg)
1380 self._dbg_process_data(dbg)
1388 return deltainfo
1381 return deltainfo
1389
1382
1390 # count the number of different delta we tried (for debug purpose)
1383 # count the number of different delta we tried (for debug purpose)
1391 dbg_try_count = 0
1384 dbg_try_count = 0
1392 # count the number of "search round" we did. (for debug purpose)
1385 # count the number of "search round" we did. (for debug purpose)
1393 dbg_try_rounds = 0
1386 dbg_try_rounds = 0
1394 dbg_type = b'unknown'
1387 dbg_type = b'unknown'
1395
1388
1396 if p1r is None:
1389 if p1r is None:
1397 p1r = revlog.rev(revinfo.p1)
1390 p1r = revlog.rev(revinfo.p1)
1398 p2r = revlog.rev(revinfo.p2)
1391 p2r = revlog.rev(revinfo.p2)
1399
1392
1400 if self._debug_search:
1393 if self._debug_search:
1401 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1394 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1402 msg %= target_rev
1395 msg %= target_rev
1403 self._write_debug(msg)
1396 self._write_debug(msg)
1404
1397
1405 groups = _candidategroups(
1398 groups = _candidategroups(
1406 self.revlog,
1399 self.revlog,
1407 revinfo.textlen,
1400 revinfo.textlen,
1408 p1r,
1401 p1r,
1409 p2r,
1402 p2r,
1410 cachedelta,
1403 cachedelta,
1411 excluded_bases,
1404 excluded_bases,
1412 target_rev,
1405 target_rev,
1413 snapshot_cache=self._snapshot_cache,
1406 snapshot_cache=self._snapshot_cache,
1414 )
1407 )
1415 candidaterevs = next(groups)
1408 candidaterevs = next(groups)
1416 while candidaterevs is not None:
1409 while candidaterevs is not None:
1417 dbg_try_rounds += 1
1410 dbg_try_rounds += 1
1418 if self._debug_search:
1411 if self._debug_search:
1419 prev = None
1412 prev = None
1420 if deltainfo is not None:
1413 if deltainfo is not None:
1421 prev = deltainfo.base
1414 prev = deltainfo.base
1422
1415
1423 if (
1416 if (
1424 cachedelta is not None
1417 cachedelta is not None
1425 and len(candidaterevs) == 1
1418 and len(candidaterevs) == 1
1426 and cachedelta[0] in candidaterevs
1419 and cachedelta[0] in candidaterevs
1427 ):
1420 ):
1428 round_type = b"cached-delta"
1421 round_type = b"cached-delta"
1429 elif p1r in candidaterevs or p2r in candidaterevs:
1422 elif p1r in candidaterevs or p2r in candidaterevs:
1430 round_type = b"parents"
1423 round_type = b"parents"
1431 elif prev is not None and all(c < prev for c in candidaterevs):
1424 elif prev is not None and all(c < prev for c in candidaterevs):
1432 round_type = b"refine-down"
1425 round_type = b"refine-down"
1433 elif prev is not None and all(c > prev for c in candidaterevs):
1426 elif prev is not None and all(c > prev for c in candidaterevs):
1434 round_type = b"refine-up"
1427 round_type = b"refine-up"
1435 else:
1428 else:
1436 round_type = b"search-down"
1429 round_type = b"search-down"
1437 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1430 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1438 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1431 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1439 self._write_debug(msg)
1432 self._write_debug(msg)
1440 nominateddeltas = []
1433 nominateddeltas = []
1441 if deltainfo is not None:
1434 if deltainfo is not None:
1442 if self._debug_search:
1435 if self._debug_search:
1443 msg = (
1436 msg = (
1444 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1437 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1445 )
1438 )
1446 msg %= (deltainfo.base, deltainfo.deltalen)
1439 msg %= (deltainfo.base, deltainfo.deltalen)
1447 self._write_debug(msg)
1440 self._write_debug(msg)
1448 # if we already found a good delta,
1441 # if we already found a good delta,
1449 # challenge it against refined candidates
1442 # challenge it against refined candidates
1450 nominateddeltas.append(deltainfo)
1443 nominateddeltas.append(deltainfo)
1451 for candidaterev in candidaterevs:
1444 for candidaterev in candidaterevs:
1452 if self._debug_search:
1445 if self._debug_search:
1453 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1446 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1454 msg %= candidaterev
1447 msg %= candidaterev
1455 self._write_debug(msg)
1448 self._write_debug(msg)
1456 candidate_type = None
1449 candidate_type = None
1457 if candidaterev == p1r:
1450 if candidaterev == p1r:
1458 candidate_type = b"p1"
1451 candidate_type = b"p1"
1459 elif candidaterev == p2r:
1452 elif candidaterev == p2r:
1460 candidate_type = b"p2"
1453 candidate_type = b"p2"
1461 elif self.revlog.issnapshot(candidaterev):
1454 elif self.revlog.issnapshot(candidaterev):
1462 candidate_type = b"snapshot-%d"
1455 candidate_type = b"snapshot-%d"
1463 candidate_type %= self.revlog.snapshotdepth(
1456 candidate_type %= self.revlog.snapshotdepth(
1464 candidaterev
1457 candidaterev
1465 )
1458 )
1466
1459
1467 if candidate_type is not None:
1460 if candidate_type is not None:
1468 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1461 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1469 msg %= candidate_type
1462 msg %= candidate_type
1470 self._write_debug(msg)
1463 self._write_debug(msg)
1471 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1464 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1472 msg %= self.revlog.length(candidaterev)
1465 msg %= self.revlog.length(candidaterev)
1473 self._write_debug(msg)
1466 self._write_debug(msg)
1474 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1467 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1475 msg %= self.revlog.deltaparent(candidaterev)
1468 msg %= self.revlog.deltaparent(candidaterev)
1476 self._write_debug(msg)
1469 self._write_debug(msg)
1477
1470
1478 dbg_try_count += 1
1471 dbg_try_count += 1
1479
1472
1480 if self._debug_search:
1473 if self._debug_search:
1481 delta_start = util.timer()
1474 delta_start = util.timer()
1482 candidatedelta = self._builddeltainfo(
1475 candidatedelta = self._builddeltainfo(
1483 revinfo,
1476 revinfo,
1484 candidaterev,
1477 candidaterev,
1485 fh,
1478 fh,
1486 target_rev=target_rev,
1479 target_rev=target_rev,
1487 )
1480 )
1488 if self._debug_search:
1481 if self._debug_search:
1489 delta_end = util.timer()
1482 delta_end = util.timer()
1490 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1483 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1491 msg %= delta_end - delta_start
1484 msg %= delta_end - delta_start
1492 self._write_debug(msg)
1485 self._write_debug(msg)
1493 if candidatedelta is not None:
1486 if candidatedelta is not None:
1494 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1487 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1495 if self._debug_search:
1488 if self._debug_search:
1496 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1489 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1497 msg %= candidatedelta.deltalen
1490 msg %= candidatedelta.deltalen
1498 self._write_debug(msg)
1491 self._write_debug(msg)
1499 nominateddeltas.append(candidatedelta)
1492 nominateddeltas.append(candidatedelta)
1500 elif self._debug_search:
1493 elif self._debug_search:
1501 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1494 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1502 msg %= candidatedelta.deltalen
1495 msg %= candidatedelta.deltalen
1503 self._write_debug(msg)
1496 self._write_debug(msg)
1504 elif self._debug_search:
1497 elif self._debug_search:
1505 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1498 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1506 self._write_debug(msg)
1499 self._write_debug(msg)
1507 if nominateddeltas:
1500 if nominateddeltas:
1508 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1501 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1509 if deltainfo is not None:
1502 if deltainfo is not None:
1510 candidaterevs = groups.send(deltainfo.base)
1503 candidaterevs = groups.send(deltainfo.base)
1511 else:
1504 else:
1512 candidaterevs = next(groups)
1505 candidaterevs = next(groups)
1513
1506
1514 if deltainfo is None:
1507 if deltainfo is None:
1515 dbg_type = b"full"
1508 dbg_type = b"full"
1516 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1509 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1517 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1510 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1518 dbg_type = b"snapshot"
1511 dbg_type = b"snapshot"
1519 else:
1512 else:
1520 dbg_type = b"delta"
1513 dbg_type = b"delta"
1521
1514
1522 if gather_debug:
1515 if gather_debug:
1523 end = util.timer()
1516 end = util.timer()
1524 if dbg_type == b'full':
1517 if dbg_type == b'full':
1525 used_cached = (
1518 used_cached = (
1526 cachedelta is not None
1519 cachedelta is not None
1527 and dbg_try_rounds == 0
1520 and dbg_try_rounds == 0
1528 and dbg_try_count == 0
1521 and dbg_try_count == 0
1529 and cachedelta[0] == nullrev
1522 and cachedelta[0] == nullrev
1530 )
1523 )
1531 else:
1524 else:
1532 used_cached = (
1525 used_cached = (
1533 cachedelta is not None
1526 cachedelta is not None
1534 and dbg_try_rounds == 1
1527 and dbg_try_rounds == 1
1535 and dbg_try_count == 1
1528 and dbg_try_count == 1
1536 and deltainfo.base == cachedelta[0]
1529 and deltainfo.base == cachedelta[0]
1537 )
1530 )
1538 dbg['duration'] = end - start
1531 dbg['duration'] = end - start
1539 dbg[
1532 dbg[
1540 'delta-base'
1533 'delta-base'
1541 ] = deltainfo.base # pytype: disable=attribute-error
1534 ] = deltainfo.base # pytype: disable=attribute-error
1542 dbg['search_round_count'] = dbg_try_rounds
1535 dbg['search_round_count'] = dbg_try_rounds
1543 dbg['using-cached-base'] = used_cached
1536 dbg['using-cached-base'] = used_cached
1544 dbg['delta_try_count'] = dbg_try_count
1537 dbg['delta_try_count'] = dbg_try_count
1545 dbg['type'] = dbg_type
1538 dbg['type'] = dbg_type
1546 if (
1539 if (
1547 deltainfo.snapshotdepth # pytype: disable=attribute-error
1540 deltainfo.snapshotdepth # pytype: disable=attribute-error
1548 is not None
1541 is not None
1549 ):
1542 ):
1550 dbg[
1543 dbg[
1551 'snapshot-depth'
1544 'snapshot-depth'
1552 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1545 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1553 else:
1546 else:
1554 dbg['snapshot-depth'] = 0
1547 dbg['snapshot-depth'] = 0
1555 self._dbg_process_data(dbg)
1548 self._dbg_process_data(dbg)
1556 return deltainfo
1549 return deltainfo
1557
1550
1558 def _one_dbg_data(self):
1551 def _one_dbg_data(self):
1559 return {
1552 return {
1560 'duration': None,
1553 'duration': None,
1561 'revision': None,
1554 'revision': None,
1562 'delta-base': None,
1555 'delta-base': None,
1563 'search_round_count': None,
1556 'search_round_count': None,
1564 'using-cached-base': None,
1557 'using-cached-base': None,
1565 'delta_try_count': None,
1558 'delta_try_count': None,
1566 'type': None,
1559 'type': None,
1567 'p1-chain-len': None,
1560 'p1-chain-len': None,
1568 'p2-chain-len': None,
1561 'p2-chain-len': None,
1569 'snapshot-depth': None,
1562 'snapshot-depth': None,
1570 'target-revlog': None,
1563 'target-revlog': None,
1571 }
1564 }
1572
1565
1573 def _dbg_process_data(self, dbg):
1566 def _dbg_process_data(self, dbg):
1574 if self._debug_info is not None:
1567 if self._debug_info is not None:
1575 self._debug_info.append(dbg)
1568 self._debug_info.append(dbg)
1576
1569
1577 if self._write_debug is not None:
1570 if self._write_debug is not None:
1578 msg = (
1571 msg = (
1579 b"DBG-DELTAS:"
1572 b"DBG-DELTAS:"
1580 b" %-12s"
1573 b" %-12s"
1581 b" rev=%d:"
1574 b" rev=%d:"
1582 b" delta-base=%d"
1575 b" delta-base=%d"
1583 b" is-cached=%d"
1576 b" is-cached=%d"
1584 b" - search-rounds=%d"
1577 b" - search-rounds=%d"
1585 b" try-count=%d"
1578 b" try-count=%d"
1586 b" - delta-type=%-6s"
1579 b" - delta-type=%-6s"
1587 b" snap-depth=%d"
1580 b" snap-depth=%d"
1588 b" - p1-chain-length=%d"
1581 b" - p1-chain-length=%d"
1589 b" p2-chain-length=%d"
1582 b" p2-chain-length=%d"
1590 b" - duration=%f"
1583 b" - duration=%f"
1591 b"\n"
1584 b"\n"
1592 )
1585 )
1593 msg %= (
1586 msg %= (
1594 dbg["target-revlog"],
1587 dbg["target-revlog"],
1595 dbg["revision"],
1588 dbg["revision"],
1596 dbg["delta-base"],
1589 dbg["delta-base"],
1597 dbg["using-cached-base"],
1590 dbg["using-cached-base"],
1598 dbg["search_round_count"],
1591 dbg["search_round_count"],
1599 dbg["delta_try_count"],
1592 dbg["delta_try_count"],
1600 dbg["type"],
1593 dbg["type"],
1601 dbg["snapshot-depth"],
1594 dbg["snapshot-depth"],
1602 dbg["p1-chain-len"],
1595 dbg["p1-chain-len"],
1603 dbg["p2-chain-len"],
1596 dbg["p2-chain-len"],
1604 dbg["duration"],
1597 dbg["duration"],
1605 )
1598 )
1606 self._write_debug(msg)
1599 self._write_debug(msg)
1607
1600
1608
1601
1609 def delta_compression(default_compression_header, deltainfo):
1602 def delta_compression(default_compression_header, deltainfo):
1610 """return (COMPRESSION_MODE, deltainfo)
1603 """return (COMPRESSION_MODE, deltainfo)
1611
1604
1612 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1605 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1613 compression.
1606 compression.
1614 """
1607 """
1615 h, d = deltainfo.data
1608 h, d = deltainfo.data
1616 compression_mode = COMP_MODE_INLINE
1609 compression_mode = COMP_MODE_INLINE
1617 if not h and not d:
1610 if not h and not d:
1618 # not data to store at all... declare them uncompressed
1611 # not data to store at all... declare them uncompressed
1619 compression_mode = COMP_MODE_PLAIN
1612 compression_mode = COMP_MODE_PLAIN
1620 elif not h:
1613 elif not h:
1621 t = d[0:1]
1614 t = d[0:1]
1622 if t == b'\0':
1615 if t == b'\0':
1623 compression_mode = COMP_MODE_PLAIN
1616 compression_mode = COMP_MODE_PLAIN
1624 elif t == default_compression_header:
1617 elif t == default_compression_header:
1625 compression_mode = COMP_MODE_DEFAULT
1618 compression_mode = COMP_MODE_DEFAULT
1626 elif h == b'u':
1619 elif h == b'u':
1627 # we have a more efficient way to declare uncompressed
1620 # we have a more efficient way to declare uncompressed
1628 h = b''
1621 h = b''
1629 compression_mode = COMP_MODE_PLAIN
1622 compression_mode = COMP_MODE_PLAIN
1630 deltainfo = drop_u_compression(deltainfo)
1623 deltainfo = drop_u_compression(deltainfo)
1631 return compression_mode, deltainfo
1624 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now