##// END OF EJS Templates
delta-find: move the `gather_debug` logic in a property...
marmoute -
r51541:5d210ff4 stable
parent child Browse files
Show More
@@ -1,1530 +1,1532 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 self._data = data
51 self._data = data
52 self._srdensitythreshold = density
52 self._srdensitythreshold = density
53 self._srmingapsize = mingap
53 self._srmingapsize = mingap
54 self._snapshot = set(snapshot)
54 self._snapshot = set(snapshot)
55 self.index = None
55 self.index = None
56
56
57 def start(self, rev):
57 def start(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 if rev == 0:
60 if rev == 0:
61 return 0
61 return 0
62 return self._data[rev - 1]
62 return self._data[rev - 1]
63
63
64 def end(self, rev):
64 def end(self, rev):
65 if rev == nullrev:
65 if rev == nullrev:
66 return 0
66 return 0
67 return self._data[rev]
67 return self._data[rev]
68
68
69 def length(self, rev):
69 def length(self, rev):
70 return self.end(rev) - self.start(rev)
70 return self.end(rev) - self.start(rev)
71
71
72 def __len__(self):
72 def __len__(self):
73 return len(self._data)
73 return len(self._data)
74
74
75 def issnapshot(self, rev):
75 def issnapshot(self, rev):
76 if rev == nullrev:
76 if rev == nullrev:
77 return True
77 return True
78 return rev in self._snapshot
78 return rev in self._snapshot
79
79
80
80
81 def slicechunk(revlog, revs, targetsize=None):
81 def slicechunk(revlog, revs, targetsize=None):
82 """slice revs to reduce the amount of unrelated data to be read from disk.
82 """slice revs to reduce the amount of unrelated data to be read from disk.
83
83
84 ``revs`` is sliced into groups that should be read in one time.
84 ``revs`` is sliced into groups that should be read in one time.
85 Assume that revs are sorted.
85 Assume that revs are sorted.
86
86
87 The initial chunk is sliced until the overall density (payload/chunks-span
87 The initial chunk is sliced until the overall density (payload/chunks-span
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 `revlog._srmingapsize` is skipped.
89 `revlog._srmingapsize` is skipped.
90
90
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 For consistency with other slicing choice, this limit won't go lower than
92 For consistency with other slicing choice, this limit won't go lower than
93 `revlog._srmingapsize`.
93 `revlog._srmingapsize`.
94
94
95 If individual revisions chunk are larger than this limit, they will still
95 If individual revisions chunk are larger than this limit, they will still
96 be raised individually.
96 be raised individually.
97
97
98 >>> data = [
98 >>> data = [
99 ... 5, #00 (5)
99 ... 5, #00 (5)
100 ... 10, #01 (5)
100 ... 10, #01 (5)
101 ... 12, #02 (2)
101 ... 12, #02 (2)
102 ... 12, #03 (empty)
102 ... 12, #03 (empty)
103 ... 27, #04 (15)
103 ... 27, #04 (15)
104 ... 31, #05 (4)
104 ... 31, #05 (4)
105 ... 31, #06 (empty)
105 ... 31, #06 (empty)
106 ... 42, #07 (11)
106 ... 42, #07 (11)
107 ... 47, #08 (5)
107 ... 47, #08 (5)
108 ... 47, #09 (empty)
108 ... 47, #09 (empty)
109 ... 48, #10 (1)
109 ... 48, #10 (1)
110 ... 51, #11 (3)
110 ... 51, #11 (3)
111 ... 74, #12 (23)
111 ... 74, #12 (23)
112 ... 85, #13 (11)
112 ... 85, #13 (11)
113 ... 86, #14 (1)
113 ... 86, #14 (1)
114 ... 91, #15 (5)
114 ... 91, #15 (5)
115 ... ]
115 ... ]
116 >>> revlog = _testrevlog(data, snapshot=range(16))
116 >>> revlog = _testrevlog(data, snapshot=range(16))
117
117
118 >>> list(slicechunk(revlog, list(range(16))))
118 >>> list(slicechunk(revlog, list(range(16))))
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 >>> list(slicechunk(revlog, [0, 15]))
120 >>> list(slicechunk(revlog, [0, 15]))
121 [[0], [15]]
121 [[0], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 [[0], [11], [15]]
123 [[0], [11], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 [[0], [11, 13, 15]]
125 [[0], [11, 13, 15]]
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 [[1, 2], [5, 8, 10, 11], [14]]
127 [[1, 2], [5, 8, 10, 11], [14]]
128
128
129 Slicing with a maximum chunk size
129 Slicing with a maximum chunk size
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 [[0], [11], [13], [15]]
131 [[0], [11], [13], [15]]
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 [[0], [11], [13, 15]]
133 [[0], [11], [13, 15]]
134
134
135 Slicing involving nullrev
135 Slicing involving nullrev
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 [[-1, 0], [11], [13, 15]]
137 [[-1, 0], [11], [13, 15]]
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 [[-1], [13], [15]]
139 [[-1], [13], [15]]
140 """
140 """
141 if targetsize is not None:
141 if targetsize is not None:
142 targetsize = max(targetsize, revlog._srmingapsize)
142 targetsize = max(targetsize, revlog._srmingapsize)
143 # targetsize should not be specified when evaluating delta candidates:
143 # targetsize should not be specified when evaluating delta candidates:
144 # * targetsize is used to ensure we stay within specification when reading,
144 # * targetsize is used to ensure we stay within specification when reading,
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 if densityslicing is None:
146 if densityslicing is None:
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 for chunk in densityslicing(
148 for chunk in densityslicing(
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 ):
150 ):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 yield subchunk
152 yield subchunk
153
153
154
154
155 def _slicechunktosize(revlog, revs, targetsize=None):
155 def _slicechunktosize(revlog, revs, targetsize=None):
156 """slice revs to match the target size
156 """slice revs to match the target size
157
157
158 This is intended to be used on chunk that density slicing selected by that
158 This is intended to be used on chunk that density slicing selected by that
159 are still too large compared to the read garantee of revlog. This might
159 are still too large compared to the read garantee of revlog. This might
160 happens when "minimal gap size" interrupted the slicing or when chain are
160 happens when "minimal gap size" interrupted the slicing or when chain are
161 built in a way that create large blocks next to each other.
161 built in a way that create large blocks next to each other.
162
162
163 >>> data = [
163 >>> data = [
164 ... 3, #0 (3)
164 ... 3, #0 (3)
165 ... 5, #1 (2)
165 ... 5, #1 (2)
166 ... 6, #2 (1)
166 ... 6, #2 (1)
167 ... 8, #3 (2)
167 ... 8, #3 (2)
168 ... 8, #4 (empty)
168 ... 8, #4 (empty)
169 ... 11, #5 (3)
169 ... 11, #5 (3)
170 ... 12, #6 (1)
170 ... 12, #6 (1)
171 ... 13, #7 (1)
171 ... 13, #7 (1)
172 ... 14, #8 (1)
172 ... 14, #8 (1)
173 ... ]
173 ... ]
174
174
175 == All snapshots cases ==
175 == All snapshots cases ==
176 >>> revlog = _testrevlog(data, snapshot=range(9))
176 >>> revlog = _testrevlog(data, snapshot=range(9))
177
177
178 Cases where chunk is already small enough
178 Cases where chunk is already small enough
179 >>> list(_slicechunktosize(revlog, [0], 3))
179 >>> list(_slicechunktosize(revlog, [0], 3))
180 [[0]]
180 [[0]]
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 [[6, 7]]
182 [[6, 7]]
183 >>> list(_slicechunktosize(revlog, [0], None))
183 >>> list(_slicechunktosize(revlog, [0], None))
184 [[0]]
184 [[0]]
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 [[6, 7]]
186 [[6, 7]]
187
187
188 cases where we need actual slicing
188 cases where we need actual slicing
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 [[0], [1]]
190 [[0], [1]]
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 [[1], [3]]
192 [[1], [3]]
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 [[1, 2], [3]]
194 [[1, 2], [3]]
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 [[3], [5]]
198 [[3], [5]]
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 [[5], [6, 7, 8]]
200 [[5], [6, 7, 8]]
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203
203
204 Case with too large individual chunk (must return valid chunk)
204 Case with too large individual chunk (must return valid chunk)
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 [[0], [1]]
206 [[0], [1]]
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 [[1], [3]]
208 [[1], [3]]
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 [[3], [5]]
210 [[3], [5]]
211
211
212 == No Snapshot cases ==
212 == No Snapshot cases ==
213 >>> revlog = _testrevlog(data)
213 >>> revlog = _testrevlog(data)
214
214
215 Cases where chunk is already small enough
215 Cases where chunk is already small enough
216 >>> list(_slicechunktosize(revlog, [0], 3))
216 >>> list(_slicechunktosize(revlog, [0], 3))
217 [[0]]
217 [[0]]
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 [[6, 7]]
219 [[6, 7]]
220 >>> list(_slicechunktosize(revlog, [0], None))
220 >>> list(_slicechunktosize(revlog, [0], None))
221 [[0]]
221 [[0]]
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 [[6, 7]]
223 [[6, 7]]
224
224
225 cases where we need actual slicing
225 cases where we need actual slicing
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 [[0], [1]]
227 [[0], [1]]
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 [[1], [3]]
229 [[1], [3]]
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 [[1], [2, 3]]
231 [[1], [2, 3]]
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 [[3], [5]]
233 [[3], [5]]
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 [[3], [4, 5]]
235 [[3], [4, 5]]
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 [[5], [6, 7, 8]]
237 [[5], [6, 7, 8]]
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240
240
241 Case with too large individual chunk (must return valid chunk)
241 Case with too large individual chunk (must return valid chunk)
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 [[0], [1]]
243 [[0], [1]]
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 [[1], [3]]
245 [[1], [3]]
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 [[3], [5]]
247 [[3], [5]]
248
248
249 == mixed case ==
249 == mixed case ==
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 """
253 """
254 assert targetsize is None or 0 <= targetsize
254 assert targetsize is None or 0 <= targetsize
255 startdata = revlog.start(revs[0])
255 startdata = revlog.start(revs[0])
256 enddata = revlog.end(revs[-1])
256 enddata = revlog.end(revs[-1])
257 fullspan = enddata - startdata
257 fullspan = enddata - startdata
258 if targetsize is None or fullspan <= targetsize:
258 if targetsize is None or fullspan <= targetsize:
259 yield revs
259 yield revs
260 return
260 return
261
261
262 startrevidx = 0
262 startrevidx = 0
263 endrevidx = 1
263 endrevidx = 1
264 iterrevs = enumerate(revs)
264 iterrevs = enumerate(revs)
265 next(iterrevs) # skip first rev.
265 next(iterrevs) # skip first rev.
266 # first step: get snapshots out of the way
266 # first step: get snapshots out of the way
267 for idx, r in iterrevs:
267 for idx, r in iterrevs:
268 span = revlog.end(r) - startdata
268 span = revlog.end(r) - startdata
269 snapshot = revlog.issnapshot(r)
269 snapshot = revlog.issnapshot(r)
270 if span <= targetsize and snapshot:
270 if span <= targetsize and snapshot:
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 else:
272 else:
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 if chunk:
274 if chunk:
275 yield chunk
275 yield chunk
276 startrevidx = idx
276 startrevidx = idx
277 startdata = revlog.start(r)
277 startdata = revlog.start(r)
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 if not snapshot:
279 if not snapshot:
280 break
280 break
281
281
282 # for the others, we use binary slicing to quickly converge toward valid
282 # for the others, we use binary slicing to quickly converge toward valid
283 # chunks (otherwise, we might end up looking for start/end of many
283 # chunks (otherwise, we might end up looking for start/end of many
284 # revisions). This logic is not looking for the perfect slicing point, it
284 # revisions). This logic is not looking for the perfect slicing point, it
285 # focuses on quickly converging toward valid chunks.
285 # focuses on quickly converging toward valid chunks.
286 nbitem = len(revs)
286 nbitem = len(revs)
287 while (enddata - startdata) > targetsize:
287 while (enddata - startdata) > targetsize:
288 endrevidx = nbitem
288 endrevidx = nbitem
289 if nbitem - startrevidx <= 1:
289 if nbitem - startrevidx <= 1:
290 break # protect against individual chunk larger than limit
290 break # protect against individual chunk larger than limit
291 localenddata = revlog.end(revs[endrevidx - 1])
291 localenddata = revlog.end(revs[endrevidx - 1])
292 span = localenddata - startdata
292 span = localenddata - startdata
293 while span > targetsize:
293 while span > targetsize:
294 if endrevidx - startrevidx <= 1:
294 if endrevidx - startrevidx <= 1:
295 break # protect against individual chunk larger than limit
295 break # protect against individual chunk larger than limit
296 endrevidx -= (endrevidx - startrevidx) // 2
296 endrevidx -= (endrevidx - startrevidx) // 2
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 if chunk:
300 if chunk:
301 yield chunk
301 yield chunk
302 startrevidx = endrevidx
302 startrevidx = endrevidx
303 startdata = revlog.start(revs[startrevidx])
303 startdata = revlog.start(revs[startrevidx])
304
304
305 chunk = _trimchunk(revlog, revs, startrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308
308
309
309
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 """slice revs to reduce the amount of unrelated data to be read from disk.
311 """slice revs to reduce the amount of unrelated data to be read from disk.
312
312
313 ``revs`` is sliced into groups that should be read in one time.
313 ``revs`` is sliced into groups that should be read in one time.
314 Assume that revs are sorted.
314 Assume that revs are sorted.
315
315
316 The initial chunk is sliced until the overall density (payload/chunks-span
316 The initial chunk is sliced until the overall density (payload/chunks-span
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 skipped.
318 skipped.
319
319
320 >>> revlog = _testrevlog([
320 >>> revlog = _testrevlog([
321 ... 5, #00 (5)
321 ... 5, #00 (5)
322 ... 10, #01 (5)
322 ... 10, #01 (5)
323 ... 12, #02 (2)
323 ... 12, #02 (2)
324 ... 12, #03 (empty)
324 ... 12, #03 (empty)
325 ... 27, #04 (15)
325 ... 27, #04 (15)
326 ... 31, #05 (4)
326 ... 31, #05 (4)
327 ... 31, #06 (empty)
327 ... 31, #06 (empty)
328 ... 42, #07 (11)
328 ... 42, #07 (11)
329 ... 47, #08 (5)
329 ... 47, #08 (5)
330 ... 47, #09 (empty)
330 ... 47, #09 (empty)
331 ... 48, #10 (1)
331 ... 48, #10 (1)
332 ... 51, #11 (3)
332 ... 51, #11 (3)
333 ... 74, #12 (23)
333 ... 74, #12 (23)
334 ... 85, #13 (11)
334 ... 85, #13 (11)
335 ... 86, #14 (1)
335 ... 86, #14 (1)
336 ... 91, #15 (5)
336 ... 91, #15 (5)
337 ... ])
337 ... ])
338
338
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 [[0], [15]]
342 [[0], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 [[0], [11], [15]]
344 [[0], [11], [15]]
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 [[0], [11, 13, 15]]
346 [[0], [11, 13, 15]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 ... mingapsize=20))
350 ... mingapsize=20))
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 ... targetdensity=0.95))
353 ... targetdensity=0.95))
354 [[1, 2], [5], [8, 10, 11], [14]]
354 [[1, 2], [5], [8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... targetdensity=0.95, mingapsize=12))
356 ... targetdensity=0.95, mingapsize=12))
357 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
358 """
358 """
359 start = revlog.start
359 start = revlog.start
360 length = revlog.length
360 length = revlog.length
361
361
362 if len(revs) <= 1:
362 if len(revs) <= 1:
363 yield revs
363 yield revs
364 return
364 return
365
365
366 deltachainspan = segmentspan(revlog, revs)
366 deltachainspan = segmentspan(revlog, revs)
367
367
368 if deltachainspan < mingapsize:
368 if deltachainspan < mingapsize:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 readdata = deltachainspan
372 readdata = deltachainspan
373 chainpayload = sum(length(r) for r in revs)
373 chainpayload = sum(length(r) for r in revs)
374
374
375 if deltachainspan:
375 if deltachainspan:
376 density = chainpayload / float(deltachainspan)
376 density = chainpayload / float(deltachainspan)
377 else:
377 else:
378 density = 1.0
378 density = 1.0
379
379
380 if density >= targetdensity:
380 if density >= targetdensity:
381 yield revs
381 yield revs
382 return
382 return
383
383
384 # Store the gaps in a heap to have them sorted by decreasing size
384 # Store the gaps in a heap to have them sorted by decreasing size
385 gaps = []
385 gaps = []
386 prevend = None
386 prevend = None
387 for i, rev in enumerate(revs):
387 for i, rev in enumerate(revs):
388 revstart = start(rev)
388 revstart = start(rev)
389 revlen = length(rev)
389 revlen = length(rev)
390
390
391 # Skip empty revisions to form larger holes
391 # Skip empty revisions to form larger holes
392 if revlen == 0:
392 if revlen == 0:
393 continue
393 continue
394
394
395 if prevend is not None:
395 if prevend is not None:
396 gapsize = revstart - prevend
396 gapsize = revstart - prevend
397 # only consider holes that are large enough
397 # only consider holes that are large enough
398 if gapsize > mingapsize:
398 if gapsize > mingapsize:
399 gaps.append((gapsize, i))
399 gaps.append((gapsize, i))
400
400
401 prevend = revstart + revlen
401 prevend = revstart + revlen
402 # sort the gaps to pop them from largest to small
402 # sort the gaps to pop them from largest to small
403 gaps.sort()
403 gaps.sort()
404
404
405 # Collect the indices of the largest holes until the density is acceptable
405 # Collect the indices of the largest holes until the density is acceptable
406 selected = []
406 selected = []
407 while gaps and density < targetdensity:
407 while gaps and density < targetdensity:
408 gapsize, gapidx = gaps.pop()
408 gapsize, gapidx = gaps.pop()
409
409
410 selected.append(gapidx)
410 selected.append(gapidx)
411
411
412 # the gap sizes are stored as negatives to be sorted decreasingly
412 # the gap sizes are stored as negatives to be sorted decreasingly
413 # by the heap
413 # by the heap
414 readdata -= gapsize
414 readdata -= gapsize
415 if readdata > 0:
415 if readdata > 0:
416 density = chainpayload / float(readdata)
416 density = chainpayload / float(readdata)
417 else:
417 else:
418 density = 1.0
418 density = 1.0
419 selected.sort()
419 selected.sort()
420
420
421 # Cut the revs at collected indices
421 # Cut the revs at collected indices
422 previdx = 0
422 previdx = 0
423 for idx in selected:
423 for idx in selected:
424
424
425 chunk = _trimchunk(revlog, revs, previdx, idx)
425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 if chunk:
426 if chunk:
427 yield chunk
427 yield chunk
428
428
429 previdx = idx
429 previdx = idx
430
430
431 chunk = _trimchunk(revlog, revs, previdx)
431 chunk = _trimchunk(revlog, revs, previdx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435
435
436 def _trimchunk(revlog, revs, startidx, endidx=None):
436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 """returns revs[startidx:endidx] without empty trailing revs
437 """returns revs[startidx:endidx] without empty trailing revs
438
438
439 Doctest Setup
439 Doctest Setup
440 >>> revlog = _testrevlog([
440 >>> revlog = _testrevlog([
441 ... 5, #0
441 ... 5, #0
442 ... 10, #1
442 ... 10, #1
443 ... 12, #2
443 ... 12, #2
444 ... 12, #3 (empty)
444 ... 12, #3 (empty)
445 ... 17, #4
445 ... 17, #4
446 ... 21, #5
446 ... 21, #5
447 ... 21, #6 (empty)
447 ... 21, #6 (empty)
448 ... ])
448 ... ])
449
449
450 Contiguous cases:
450 Contiguous cases:
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 [0, 1, 2, 3, 4, 5]
452 [0, 1, 2, 3, 4, 5]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 [0, 1, 2, 3, 4]
454 [0, 1, 2, 3, 4]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 [0, 1, 2]
456 [0, 1, 2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 [2]
458 [2]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 [3, 4, 5]
460 [3, 4, 5]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 [3, 4]
462 [3, 4]
463
463
464 Discontiguous cases:
464 Discontiguous cases:
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 [1, 3, 5]
466 [1, 3, 5]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 [1]
468 [1]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 [3, 5]
470 [3, 5]
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 [3, 5]
472 [3, 5]
473 """
473 """
474 length = revlog.length
474 length = revlog.length
475
475
476 if endidx is None:
476 if endidx is None:
477 endidx = len(revs)
477 endidx = len(revs)
478
478
479 # If we have a non-emtpy delta candidate, there are nothing to trim
479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 if revs[endidx - 1] < len(revlog):
480 if revs[endidx - 1] < len(revlog):
481 # Trim empty revs at the end, except the very first revision of a chain
481 # Trim empty revs at the end, except the very first revision of a chain
482 while (
482 while (
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 ):
484 ):
485 endidx -= 1
485 endidx -= 1
486
486
487 return revs[startidx:endidx]
487 return revs[startidx:endidx]
488
488
489
489
490 def segmentspan(revlog, revs):
490 def segmentspan(revlog, revs):
491 """Get the byte span of a segment of revisions
491 """Get the byte span of a segment of revisions
492
492
493 revs is a sorted array of revision numbers
493 revs is a sorted array of revision numbers
494
494
495 >>> revlog = _testrevlog([
495 >>> revlog = _testrevlog([
496 ... 5, #0
496 ... 5, #0
497 ... 10, #1
497 ... 10, #1
498 ... 12, #2
498 ... 12, #2
499 ... 12, #3 (empty)
499 ... 12, #3 (empty)
500 ... 17, #4
500 ... 17, #4
501 ... ])
501 ... ])
502
502
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 17
504 17
505 >>> segmentspan(revlog, [0, 4])
505 >>> segmentspan(revlog, [0, 4])
506 17
506 17
507 >>> segmentspan(revlog, [3, 4])
507 >>> segmentspan(revlog, [3, 4])
508 5
508 5
509 >>> segmentspan(revlog, [1, 2, 3,])
509 >>> segmentspan(revlog, [1, 2, 3,])
510 7
510 7
511 >>> segmentspan(revlog, [1, 3])
511 >>> segmentspan(revlog, [1, 3])
512 7
512 7
513 """
513 """
514 if not revs:
514 if not revs:
515 return 0
515 return 0
516 end = revlog.end(revs[-1])
516 end = revlog.end(revs[-1])
517 return end - revlog.start(revs[0])
517 return end - revlog.start(revs[0])
518
518
519
519
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 """build full text from a (base, delta) pair and other metadata"""
521 """build full text from a (base, delta) pair and other metadata"""
522 # special case deltas which replace entire base; no need to decode
522 # special case deltas which replace entire base; no need to decode
523 # base revision. this neatly avoids censored bases, which throw when
523 # base revision. this neatly avoids censored bases, which throw when
524 # they're decoded.
524 # they're decoded.
525 hlen = struct.calcsize(b">lll")
525 hlen = struct.calcsize(b">lll")
526 if delta[:hlen] == mdiff.replacediffheader(
526 if delta[:hlen] == mdiff.replacediffheader(
527 revlog.rawsize(baserev), len(delta) - hlen
527 revlog.rawsize(baserev), len(delta) - hlen
528 ):
528 ):
529 fulltext = delta[hlen:]
529 fulltext = delta[hlen:]
530 else:
530 else:
531 # deltabase is rawtext before changed by flag processors, which is
531 # deltabase is rawtext before changed by flag processors, which is
532 # equivalent to non-raw text
532 # equivalent to non-raw text
533 basetext = revlog.revision(baserev, _df=fh)
533 basetext = revlog.revision(baserev, _df=fh)
534 fulltext = mdiff.patch(basetext, delta)
534 fulltext = mdiff.patch(basetext, delta)
535
535
536 try:
536 try:
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 if validatehash:
538 if validatehash:
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 if flags & REVIDX_ISCENSORED:
540 if flags & REVIDX_ISCENSORED:
541 raise error.StorageError(
541 raise error.StorageError(
542 _(b'node %s is not censored') % expectednode
542 _(b'node %s is not censored') % expectednode
543 )
543 )
544 except error.CensoredNodeError:
544 except error.CensoredNodeError:
545 # must pass the censored index flag to add censored revisions
545 # must pass the censored index flag to add censored revisions
546 if not flags & REVIDX_ISCENSORED:
546 if not flags & REVIDX_ISCENSORED:
547 raise
547 raise
548 return fulltext
548 return fulltext
549
549
550
550
551 @attr.s(slots=True, frozen=True)
551 @attr.s(slots=True, frozen=True)
552 class _deltainfo:
552 class _deltainfo:
553 distance = attr.ib()
553 distance = attr.ib()
554 deltalen = attr.ib()
554 deltalen = attr.ib()
555 data = attr.ib()
555 data = attr.ib()
556 base = attr.ib()
556 base = attr.ib()
557 chainbase = attr.ib()
557 chainbase = attr.ib()
558 chainlen = attr.ib()
558 chainlen = attr.ib()
559 compresseddeltalen = attr.ib()
559 compresseddeltalen = attr.ib()
560 snapshotdepth = attr.ib()
560 snapshotdepth = attr.ib()
561
561
562
562
563 def drop_u_compression(delta):
563 def drop_u_compression(delta):
564 """turn into a "u" (no-compression) into no-compression without header
564 """turn into a "u" (no-compression) into no-compression without header
565
565
566 This is useful for revlog format that has better compression method.
566 This is useful for revlog format that has better compression method.
567 """
567 """
568 assert delta.data[0] == b'u', delta.data[0]
568 assert delta.data[0] == b'u', delta.data[0]
569 return _deltainfo(
569 return _deltainfo(
570 delta.distance,
570 delta.distance,
571 delta.deltalen - 1,
571 delta.deltalen - 1,
572 (b'', delta.data[1]),
572 (b'', delta.data[1]),
573 delta.base,
573 delta.base,
574 delta.chainbase,
574 delta.chainbase,
575 delta.chainlen,
575 delta.chainlen,
576 delta.compresseddeltalen,
576 delta.compresseddeltalen,
577 delta.snapshotdepth,
577 delta.snapshotdepth,
578 )
578 )
579
579
580
580
581 def is_good_delta_info(revlog, deltainfo, revinfo):
581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 """Returns True if the given delta is good. Good means that it is within
582 """Returns True if the given delta is good. Good means that it is within
583 the disk span, disk size, and chain length bounds that we know to be
583 the disk span, disk size, and chain length bounds that we know to be
584 performant."""
584 performant."""
585 if deltainfo is None:
585 if deltainfo is None:
586 return False
586 return False
587
587
588 if (
588 if (
589 revinfo.cachedelta is not None
589 revinfo.cachedelta is not None
590 and deltainfo.base == revinfo.cachedelta[0]
590 and deltainfo.base == revinfo.cachedelta[0]
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 ):
592 ):
593 return True
593 return True
594
594
595 # - 'deltainfo.distance' is the distance from the base revision --
595 # - 'deltainfo.distance' is the distance from the base revision --
596 # bounding it limits the amount of I/O we need to do.
596 # bounding it limits the amount of I/O we need to do.
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 # deltas we need to apply -- bounding it limits the amount of CPU
598 # deltas we need to apply -- bounding it limits the amount of CPU
599 # we consume.
599 # we consume.
600
600
601 textlen = revinfo.textlen
601 textlen = revinfo.textlen
602 defaultmax = textlen * 4
602 defaultmax = textlen * 4
603 maxdist = revlog._maxdeltachainspan
603 maxdist = revlog._maxdeltachainspan
604 if not maxdist:
604 if not maxdist:
605 maxdist = deltainfo.distance # ensure the conditional pass
605 maxdist = deltainfo.distance # ensure the conditional pass
606 maxdist = max(maxdist, defaultmax)
606 maxdist = max(maxdist, defaultmax)
607
607
608 # Bad delta from read span:
608 # Bad delta from read span:
609 #
609 #
610 # If the span of data read is larger than the maximum allowed.
610 # If the span of data read is larger than the maximum allowed.
611 #
611 #
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 # to avoid issue related to the span of data. In theory, it would be
613 # to avoid issue related to the span of data. In theory, it would be
614 # possible to build pathological revlog where delta pattern would lead
614 # possible to build pathological revlog where delta pattern would lead
615 # to too many reads. However, they do not happen in practice at all. So
615 # to too many reads. However, they do not happen in practice at all. So
616 # we skip the span check entirely.
616 # we skip the span check entirely.
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 return False
618 return False
619
619
620 # Bad delta from new delta size:
620 # Bad delta from new delta size:
621 #
621 #
622 # If the delta size is larger than the target text, storing the
622 # If the delta size is larger than the target text, storing the
623 # delta will be inefficient.
623 # delta will be inefficient.
624 if textlen < deltainfo.deltalen:
624 if textlen < deltainfo.deltalen:
625 return False
625 return False
626
626
627 # Bad delta from cumulated payload size:
627 # Bad delta from cumulated payload size:
628 #
628 #
629 # If the sum of delta get larger than K * target text length.
629 # If the sum of delta get larger than K * target text length.
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 return False
631 return False
632
632
633 # Bad delta from chain length:
633 # Bad delta from chain length:
634 #
634 #
635 # If the number of delta in the chain gets too high.
635 # If the number of delta in the chain gets too high.
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 return False
637 return False
638
638
639 # bad delta from intermediate snapshot size limit
639 # bad delta from intermediate snapshot size limit
640 #
640 #
641 # If an intermediate snapshot size is higher than the limit. The
641 # If an intermediate snapshot size is higher than the limit. The
642 # limit exist to prevent endless chain of intermediate delta to be
642 # limit exist to prevent endless chain of intermediate delta to be
643 # created.
643 # created.
644 if (
644 if (
645 deltainfo.snapshotdepth is not None
645 deltainfo.snapshotdepth is not None
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta if new intermediate snapshot is larger than the previous
650 # bad delta if new intermediate snapshot is larger than the previous
651 # snapshot
651 # snapshot
652 if (
652 if (
653 deltainfo.snapshotdepth
653 deltainfo.snapshotdepth
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 ):
655 ):
656 return False
656 return False
657
657
658 return True
658 return True
659
659
660
660
661 # If a revision's full text is that much bigger than a base candidate full
661 # If a revision's full text is that much bigger than a base candidate full
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 # consider these candidates.
663 # consider these candidates.
664 LIMIT_BASE2TEXT = 500
664 LIMIT_BASE2TEXT = 500
665
665
666
666
667 def _candidategroups(
667 def _candidategroups(
668 revlog,
668 revlog,
669 textlen,
669 textlen,
670 p1,
670 p1,
671 p2,
671 p2,
672 cachedelta,
672 cachedelta,
673 excluded_bases=None,
673 excluded_bases=None,
674 target_rev=None,
674 target_rev=None,
675 snapshot_cache=None,
675 snapshot_cache=None,
676 ):
676 ):
677 """Provides group of revision to be tested as delta base
677 """Provides group of revision to be tested as delta base
678
678
679 This top level function focus on emitting groups with unique and worthwhile
679 This top level function focus on emitting groups with unique and worthwhile
680 content. See _raw_candidate_groups for details about the group order.
680 content. See _raw_candidate_groups for details about the group order.
681 """
681 """
682 # should we try to build a delta?
682 # should we try to build a delta?
683 if not (len(revlog) and revlog._storedeltachains):
683 if not (len(revlog) and revlog._storedeltachains):
684 yield None
684 yield None
685 return
685 return
686
686
687 if target_rev is None:
687 if target_rev is None:
688 target_rev = len(revlog)
688 target_rev = len(revlog)
689
689
690 if not revlog._generaldelta:
690 if not revlog._generaldelta:
691 # before general delta, there is only one possible delta base
691 # before general delta, there is only one possible delta base
692 yield (target_rev - 1,)
692 yield (target_rev - 1,)
693 yield None
693 yield None
694 return
694 return
695
695
696 if (
696 if (
697 cachedelta is not None
697 cachedelta is not None
698 and nullrev == cachedelta[0]
698 and nullrev == cachedelta[0]
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
700 ):
700 ):
701 # instruction are to forcibly do a full snapshot
701 # instruction are to forcibly do a full snapshot
702 yield None
702 yield None
703 return
703 return
704
704
705 deltalength = revlog.length
705 deltalength = revlog.length
706 deltaparent = revlog.deltaparent
706 deltaparent = revlog.deltaparent
707 sparse = revlog._sparserevlog
707 sparse = revlog._sparserevlog
708 good = None
708 good = None
709
709
710 deltas_limit = textlen * LIMIT_DELTA2TEXT
710 deltas_limit = textlen * LIMIT_DELTA2TEXT
711 group_chunk_size = revlog._candidate_group_chunk_size
711 group_chunk_size = revlog._candidate_group_chunk_size
712
712
713 tested = {nullrev}
713 tested = {nullrev}
714 candidates = _refinedgroups(
714 candidates = _refinedgroups(
715 revlog,
715 revlog,
716 p1,
716 p1,
717 p2,
717 p2,
718 cachedelta,
718 cachedelta,
719 snapshot_cache=snapshot_cache,
719 snapshot_cache=snapshot_cache,
720 )
720 )
721 while True:
721 while True:
722 temptative = candidates.send(good)
722 temptative = candidates.send(good)
723 if temptative is None:
723 if temptative is None:
724 break
724 break
725 group = []
725 group = []
726 for rev in temptative:
726 for rev in temptative:
727 # skip over empty delta (no need to include them in a chain)
727 # skip over empty delta (no need to include them in a chain)
728 while not (rev == nullrev or rev in tested or deltalength(rev)):
728 while not (rev == nullrev or rev in tested or deltalength(rev)):
729 tested.add(rev)
729 tested.add(rev)
730 rev = deltaparent(rev)
730 rev = deltaparent(rev)
731 # no need to try a delta against nullrev, this will be done as a
731 # no need to try a delta against nullrev, this will be done as a
732 # last resort.
732 # last resort.
733 if rev == nullrev:
733 if rev == nullrev:
734 continue
734 continue
735 # filter out revision we tested already
735 # filter out revision we tested already
736 if rev in tested:
736 if rev in tested:
737 continue
737 continue
738
738
739 if (
739 if (
740 cachedelta is not None
740 cachedelta is not None
741 and rev == cachedelta[0]
741 and rev == cachedelta[0]
742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
742 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
743 ):
743 ):
744 # instructions are to forcibly consider/use this delta base
744 # instructions are to forcibly consider/use this delta base
745 group.append(rev)
745 group.append(rev)
746 continue
746 continue
747
747
748 # an higher authority deamed the base unworthy (e.g. censored)
748 # an higher authority deamed the base unworthy (e.g. censored)
749 if excluded_bases is not None and rev in excluded_bases:
749 if excluded_bases is not None and rev in excluded_bases:
750 tested.add(rev)
750 tested.add(rev)
751 continue
751 continue
752 # We are in some recomputation cases and that rev is too high in
752 # We are in some recomputation cases and that rev is too high in
753 # the revlog
753 # the revlog
754 if target_rev is not None and rev >= target_rev:
754 if target_rev is not None and rev >= target_rev:
755 tested.add(rev)
755 tested.add(rev)
756 continue
756 continue
757 # filter out delta base that will never produce good delta
757 # filter out delta base that will never produce good delta
758 if deltas_limit < revlog.length(rev):
758 if deltas_limit < revlog.length(rev):
759 tested.add(rev)
759 tested.add(rev)
760 continue
760 continue
761 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
761 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
762 tested.add(rev)
762 tested.add(rev)
763 continue
763 continue
764 # no delta for rawtext-changing revs (see "candelta" for why)
764 # no delta for rawtext-changing revs (see "candelta" for why)
765 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
765 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
766 tested.add(rev)
766 tested.add(rev)
767 continue
767 continue
768
768
769 # If we reach here, we are about to build and test a delta.
769 # If we reach here, we are about to build and test a delta.
770 # The delta building process will compute the chaininfo in all
770 # The delta building process will compute the chaininfo in all
771 # case, since that computation is cached, it is fine to access it
771 # case, since that computation is cached, it is fine to access it
772 # here too.
772 # here too.
773 chainlen, chainsize = revlog._chaininfo(rev)
773 chainlen, chainsize = revlog._chaininfo(rev)
774 # if chain will be too long, skip base
774 # if chain will be too long, skip base
775 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
775 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
776 tested.add(rev)
776 tested.add(rev)
777 continue
777 continue
778 # if chain already have too much data, skip base
778 # if chain already have too much data, skip base
779 if deltas_limit < chainsize:
779 if deltas_limit < chainsize:
780 tested.add(rev)
780 tested.add(rev)
781 continue
781 continue
782 if sparse and revlog.upperboundcomp is not None:
782 if sparse and revlog.upperboundcomp is not None:
783 maxcomp = revlog.upperboundcomp
783 maxcomp = revlog.upperboundcomp
784 basenotsnap = (p1, p2, nullrev)
784 basenotsnap = (p1, p2, nullrev)
785 if rev not in basenotsnap and revlog.issnapshot(rev):
785 if rev not in basenotsnap and revlog.issnapshot(rev):
786 snapshotdepth = revlog.snapshotdepth(rev)
786 snapshotdepth = revlog.snapshotdepth(rev)
787 # If text is significantly larger than the base, we can
787 # If text is significantly larger than the base, we can
788 # expect the resulting delta to be proportional to the size
788 # expect the resulting delta to be proportional to the size
789 # difference
789 # difference
790 revsize = revlog.rawsize(rev)
790 revsize = revlog.rawsize(rev)
791 rawsizedistance = max(textlen - revsize, 0)
791 rawsizedistance = max(textlen - revsize, 0)
792 # use an estimate of the compression upper bound.
792 # use an estimate of the compression upper bound.
793 lowestrealisticdeltalen = rawsizedistance // maxcomp
793 lowestrealisticdeltalen = rawsizedistance // maxcomp
794
794
795 # check the absolute constraint on the delta size
795 # check the absolute constraint on the delta size
796 snapshotlimit = textlen >> snapshotdepth
796 snapshotlimit = textlen >> snapshotdepth
797 if snapshotlimit < lowestrealisticdeltalen:
797 if snapshotlimit < lowestrealisticdeltalen:
798 # delta lower bound is larger than accepted upper bound
798 # delta lower bound is larger than accepted upper bound
799 tested.add(rev)
799 tested.add(rev)
800 continue
800 continue
801
801
802 # check the relative constraint on the delta size
802 # check the relative constraint on the delta size
803 revlength = revlog.length(rev)
803 revlength = revlog.length(rev)
804 if revlength < lowestrealisticdeltalen:
804 if revlength < lowestrealisticdeltalen:
805 # delta probable lower bound is larger than target base
805 # delta probable lower bound is larger than target base
806 tested.add(rev)
806 tested.add(rev)
807 continue
807 continue
808
808
809 group.append(rev)
809 group.append(rev)
810 if group:
810 if group:
811 # When the size of the candidate group is big, it can result in a
811 # When the size of the candidate group is big, it can result in a
812 # quite significant performance impact. To reduce this, we can send
812 # quite significant performance impact. To reduce this, we can send
813 # them in smaller batches until the new batch does not provide any
813 # them in smaller batches until the new batch does not provide any
814 # improvements.
814 # improvements.
815 #
815 #
816 # This might reduce the overall efficiency of the compression in
816 # This might reduce the overall efficiency of the compression in
817 # some corner cases, but that should also prevent very pathological
817 # some corner cases, but that should also prevent very pathological
818 # cases from being an issue. (eg. 20 000 candidates).
818 # cases from being an issue. (eg. 20 000 candidates).
819 #
819 #
820 # XXX note that the ordering of the group becomes important as it
820 # XXX note that the ordering of the group becomes important as it
821 # now impacts the final result. The current order is unprocessed
821 # now impacts the final result. The current order is unprocessed
822 # and can be improved.
822 # and can be improved.
823 if group_chunk_size == 0:
823 if group_chunk_size == 0:
824 tested.update(group)
824 tested.update(group)
825 good = yield tuple(group)
825 good = yield tuple(group)
826 else:
826 else:
827 prev_good = good
827 prev_good = good
828 for start in range(0, len(group), group_chunk_size):
828 for start in range(0, len(group), group_chunk_size):
829 sub_group = group[start : start + group_chunk_size]
829 sub_group = group[start : start + group_chunk_size]
830 tested.update(sub_group)
830 tested.update(sub_group)
831 good = yield tuple(sub_group)
831 good = yield tuple(sub_group)
832 if prev_good == good:
832 if prev_good == good:
833 break
833 break
834
834
835 yield None
835 yield None
836
836
837
837
838 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
838 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
839 good = None
839 good = None
840 # First we try to reuse a the delta contained in the bundle.
840 # First we try to reuse a the delta contained in the bundle.
841 # (or from the source revlog)
841 # (or from the source revlog)
842 #
842 #
843 # This logic only applies to general delta repositories and can be disabled
843 # This logic only applies to general delta repositories and can be disabled
844 # through configuration. Disabling reuse source delta is useful when
844 # through configuration. Disabling reuse source delta is useful when
845 # we want to make sure we recomputed "optimal" deltas.
845 # we want to make sure we recomputed "optimal" deltas.
846 debug_info = None
846 debug_info = None
847 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
847 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
848 # Assume what we received from the server is a good choice
848 # Assume what we received from the server is a good choice
849 # build delta will reuse the cache
849 # build delta will reuse the cache
850 if debug_info is not None:
850 if debug_info is not None:
851 debug_info['cached-delta.tested'] += 1
851 debug_info['cached-delta.tested'] += 1
852 good = yield (cachedelta[0],)
852 good = yield (cachedelta[0],)
853 if good is not None:
853 if good is not None:
854 if debug_info is not None:
854 if debug_info is not None:
855 debug_info['cached-delta.accepted'] += 1
855 debug_info['cached-delta.accepted'] += 1
856 yield None
856 yield None
857 return
857 return
858 if snapshot_cache is None:
858 if snapshot_cache is None:
859 snapshot_cache = SnapshotCache()
859 snapshot_cache = SnapshotCache()
860 groups = _rawgroups(
860 groups = _rawgroups(
861 revlog,
861 revlog,
862 p1,
862 p1,
863 p2,
863 p2,
864 cachedelta,
864 cachedelta,
865 snapshot_cache,
865 snapshot_cache,
866 )
866 )
867 for candidates in groups:
867 for candidates in groups:
868 good = yield candidates
868 good = yield candidates
869 if good is not None:
869 if good is not None:
870 break
870 break
871
871
872 # If sparse revlog is enabled, we can try to refine the available deltas
872 # If sparse revlog is enabled, we can try to refine the available deltas
873 if not revlog._sparserevlog:
873 if not revlog._sparserevlog:
874 yield None
874 yield None
875 return
875 return
876
876
877 # if we have a refinable value, try to refine it
877 # if we have a refinable value, try to refine it
878 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
878 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
879 # refine snapshot down
879 # refine snapshot down
880 previous = None
880 previous = None
881 while previous != good:
881 while previous != good:
882 previous = good
882 previous = good
883 base = revlog.deltaparent(good)
883 base = revlog.deltaparent(good)
884 if base == nullrev:
884 if base == nullrev:
885 break
885 break
886 good = yield (base,)
886 good = yield (base,)
887 # refine snapshot up
887 # refine snapshot up
888 if not snapshot_cache.snapshots:
888 if not snapshot_cache.snapshots:
889 snapshot_cache.update(revlog, good + 1)
889 snapshot_cache.update(revlog, good + 1)
890 previous = None
890 previous = None
891 while good != previous:
891 while good != previous:
892 previous = good
892 previous = good
893 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
893 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
894 good = yield children
894 good = yield children
895
895
896 if debug_info is not None:
896 if debug_info is not None:
897 if good is None:
897 if good is None:
898 debug_info['no-solution'] += 1
898 debug_info['no-solution'] += 1
899
899
900 yield None
900 yield None
901
901
902
902
903 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
903 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
904 """Provides group of revision to be tested as delta base
904 """Provides group of revision to be tested as delta base
905
905
906 This lower level function focus on emitting delta theorically interresting
906 This lower level function focus on emitting delta theorically interresting
907 without looking it any practical details.
907 without looking it any practical details.
908
908
909 The group order aims at providing fast or small candidates first.
909 The group order aims at providing fast or small candidates first.
910 """
910 """
911 # Why search for delta base if we cannot use a delta base ?
911 # Why search for delta base if we cannot use a delta base ?
912 assert revlog._generaldelta
912 assert revlog._generaldelta
913 # also see issue6056
913 # also see issue6056
914 sparse = revlog._sparserevlog
914 sparse = revlog._sparserevlog
915 curr = len(revlog)
915 curr = len(revlog)
916 prev = curr - 1
916 prev = curr - 1
917 deltachain = lambda rev: revlog._deltachain(rev)[0]
917 deltachain = lambda rev: revlog._deltachain(rev)[0]
918
918
919 # exclude already lazy tested base if any
919 # exclude already lazy tested base if any
920 parents = [p for p in (p1, p2) if p != nullrev]
920 parents = [p for p in (p1, p2) if p != nullrev]
921
921
922 if not revlog._deltabothparents and len(parents) == 2:
922 if not revlog._deltabothparents and len(parents) == 2:
923 parents.sort()
923 parents.sort()
924 # To minimize the chance of having to build a fulltext,
924 # To minimize the chance of having to build a fulltext,
925 # pick first whichever parent is closest to us (max rev)
925 # pick first whichever parent is closest to us (max rev)
926 yield (parents[1],)
926 yield (parents[1],)
927 # then the other one (min rev) if the first did not fit
927 # then the other one (min rev) if the first did not fit
928 yield (parents[0],)
928 yield (parents[0],)
929 elif len(parents) > 0:
929 elif len(parents) > 0:
930 # Test all parents (1 or 2), and keep the best candidate
930 # Test all parents (1 or 2), and keep the best candidate
931 yield parents
931 yield parents
932
932
933 if sparse and parents:
933 if sparse and parents:
934 if snapshot_cache is None:
934 if snapshot_cache is None:
935 # map: base-rev: [snapshot-revs]
935 # map: base-rev: [snapshot-revs]
936 snapshot_cache = SnapshotCache()
936 snapshot_cache = SnapshotCache()
937 # See if we can use an existing snapshot in the parent chains to use as
937 # See if we can use an existing snapshot in the parent chains to use as
938 # a base for a new intermediate-snapshot
938 # a base for a new intermediate-snapshot
939 #
939 #
940 # search for snapshot in parents delta chain
940 # search for snapshot in parents delta chain
941 # map: snapshot-level: snapshot-rev
941 # map: snapshot-level: snapshot-rev
942 parents_snaps = collections.defaultdict(set)
942 parents_snaps = collections.defaultdict(set)
943 candidate_chains = [deltachain(p) for p in parents]
943 candidate_chains = [deltachain(p) for p in parents]
944 for chain in candidate_chains:
944 for chain in candidate_chains:
945 for idx, s in enumerate(chain):
945 for idx, s in enumerate(chain):
946 if not revlog.issnapshot(s):
946 if not revlog.issnapshot(s):
947 break
947 break
948 parents_snaps[idx].add(s)
948 parents_snaps[idx].add(s)
949 snapfloor = min(parents_snaps[0]) + 1
949 snapfloor = min(parents_snaps[0]) + 1
950 snapshot_cache.update(revlog, snapfloor)
950 snapshot_cache.update(revlog, snapfloor)
951 # search for the highest "unrelated" revision
951 # search for the highest "unrelated" revision
952 #
952 #
953 # Adding snapshots used by "unrelated" revision increase the odd we
953 # Adding snapshots used by "unrelated" revision increase the odd we
954 # reuse an independant, yet better snapshot chain.
954 # reuse an independant, yet better snapshot chain.
955 #
955 #
956 # XXX instead of building a set of revisions, we could lazily enumerate
956 # XXX instead of building a set of revisions, we could lazily enumerate
957 # over the chains. That would be more efficient, however we stick to
957 # over the chains. That would be more efficient, however we stick to
958 # simple code for now.
958 # simple code for now.
959 all_revs = set()
959 all_revs = set()
960 for chain in candidate_chains:
960 for chain in candidate_chains:
961 all_revs.update(chain)
961 all_revs.update(chain)
962 other = None
962 other = None
963 for r in revlog.revs(prev, snapfloor):
963 for r in revlog.revs(prev, snapfloor):
964 if r not in all_revs:
964 if r not in all_revs:
965 other = r
965 other = r
966 break
966 break
967 if other is not None:
967 if other is not None:
968 # To avoid unfair competition, we won't use unrelated intermediate
968 # To avoid unfair competition, we won't use unrelated intermediate
969 # snapshot that are deeper than the ones from the parent delta
969 # snapshot that are deeper than the ones from the parent delta
970 # chain.
970 # chain.
971 max_depth = max(parents_snaps.keys())
971 max_depth = max(parents_snaps.keys())
972 chain = deltachain(other)
972 chain = deltachain(other)
973 for depth, s in enumerate(chain):
973 for depth, s in enumerate(chain):
974 if s < snapfloor:
974 if s < snapfloor:
975 continue
975 continue
976 if max_depth < depth:
976 if max_depth < depth:
977 break
977 break
978 if not revlog.issnapshot(s):
978 if not revlog.issnapshot(s):
979 break
979 break
980 parents_snaps[depth].add(s)
980 parents_snaps[depth].add(s)
981 # Test them as possible intermediate snapshot base
981 # Test them as possible intermediate snapshot base
982 # We test them from highest to lowest level. High level one are more
982 # We test them from highest to lowest level. High level one are more
983 # likely to result in small delta
983 # likely to result in small delta
984 floor = None
984 floor = None
985 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
985 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
986 siblings = set()
986 siblings = set()
987 for s in snaps:
987 for s in snaps:
988 siblings.update(snapshot_cache.snapshots[s])
988 siblings.update(snapshot_cache.snapshots[s])
989 # Before considering making a new intermediate snapshot, we check
989 # Before considering making a new intermediate snapshot, we check
990 # if an existing snapshot, children of base we consider, would be
990 # if an existing snapshot, children of base we consider, would be
991 # suitable.
991 # suitable.
992 #
992 #
993 # It give a change to reuse a delta chain "unrelated" to the
993 # It give a change to reuse a delta chain "unrelated" to the
994 # current revision instead of starting our own. Without such
994 # current revision instead of starting our own. Without such
995 # re-use, topological branches would keep reopening new chains.
995 # re-use, topological branches would keep reopening new chains.
996 # Creating more and more snapshot as the repository grow.
996 # Creating more and more snapshot as the repository grow.
997
997
998 if floor is not None:
998 if floor is not None:
999 # We only do this for siblings created after the one in our
999 # We only do this for siblings created after the one in our
1000 # parent's delta chain. Those created before has less chances
1000 # parent's delta chain. Those created before has less chances
1001 # to be valid base since our ancestors had to create a new
1001 # to be valid base since our ancestors had to create a new
1002 # snapshot.
1002 # snapshot.
1003 siblings = [r for r in siblings if floor < r]
1003 siblings = [r for r in siblings if floor < r]
1004 yield tuple(sorted(siblings))
1004 yield tuple(sorted(siblings))
1005 # then test the base from our parent's delta chain.
1005 # then test the base from our parent's delta chain.
1006 yield tuple(sorted(snaps))
1006 yield tuple(sorted(snaps))
1007 floor = min(snaps)
1007 floor = min(snaps)
1008 # No suitable base found in the parent chain, search if any full
1008 # No suitable base found in the parent chain, search if any full
1009 # snapshots emitted since parent's base would be a suitable base for an
1009 # snapshots emitted since parent's base would be a suitable base for an
1010 # intermediate snapshot.
1010 # intermediate snapshot.
1011 #
1011 #
1012 # It give a chance to reuse a delta chain unrelated to the current
1012 # It give a chance to reuse a delta chain unrelated to the current
1013 # revisions instead of starting our own. Without such re-use,
1013 # revisions instead of starting our own. Without such re-use,
1014 # topological branches would keep reopening new full chains. Creating
1014 # topological branches would keep reopening new full chains. Creating
1015 # more and more snapshot as the repository grow.
1015 # more and more snapshot as the repository grow.
1016 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1016 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1017 yield tuple(sorted(full))
1017 yield tuple(sorted(full))
1018
1018
1019 if not sparse:
1019 if not sparse:
1020 # other approach failed try against prev to hopefully save us a
1020 # other approach failed try against prev to hopefully save us a
1021 # fulltext.
1021 # fulltext.
1022 yield (prev,)
1022 yield (prev,)
1023
1023
1024
1024
1025 class SnapshotCache:
1025 class SnapshotCache:
1026 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1026 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1027
1027
1028 def __init__(self):
1028 def __init__(self):
1029 self.snapshots = collections.defaultdict(set)
1029 self.snapshots = collections.defaultdict(set)
1030 self._start_rev = None
1030 self._start_rev = None
1031 self._end_rev = None
1031 self._end_rev = None
1032
1032
1033 def update(self, revlog, start_rev=0):
1033 def update(self, revlog, start_rev=0):
1034 """find snapshots from start_rev to tip"""
1034 """find snapshots from start_rev to tip"""
1035 nb_revs = len(revlog)
1035 nb_revs = len(revlog)
1036 end_rev = nb_revs - 1
1036 end_rev = nb_revs - 1
1037 if start_rev > end_rev:
1037 if start_rev > end_rev:
1038 return # range is empty
1038 return # range is empty
1039
1039
1040 if self._start_rev is None:
1040 if self._start_rev is None:
1041 assert self._end_rev is None
1041 assert self._end_rev is None
1042 self._update(revlog, start_rev, end_rev)
1042 self._update(revlog, start_rev, end_rev)
1043 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1043 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1044 if start_rev < self._start_rev:
1044 if start_rev < self._start_rev:
1045 self._update(revlog, start_rev, self._start_rev - 1)
1045 self._update(revlog, start_rev, self._start_rev - 1)
1046 if self._end_rev < end_rev:
1046 if self._end_rev < end_rev:
1047 self._update(revlog, self._end_rev + 1, end_rev)
1047 self._update(revlog, self._end_rev + 1, end_rev)
1048
1048
1049 if self._start_rev is None:
1049 if self._start_rev is None:
1050 assert self._end_rev is None
1050 assert self._end_rev is None
1051 self._end_rev = end_rev
1051 self._end_rev = end_rev
1052 self._start_rev = start_rev
1052 self._start_rev = start_rev
1053 else:
1053 else:
1054 self._start_rev = min(self._start_rev, start_rev)
1054 self._start_rev = min(self._start_rev, start_rev)
1055 self._end_rev = max(self._end_rev, end_rev)
1055 self._end_rev = max(self._end_rev, end_rev)
1056 assert self._start_rev <= self._end_rev, (
1056 assert self._start_rev <= self._end_rev, (
1057 self._start_rev,
1057 self._start_rev,
1058 self._end_rev,
1058 self._end_rev,
1059 )
1059 )
1060
1060
1061 def _update(self, revlog, start_rev, end_rev):
1061 def _update(self, revlog, start_rev, end_rev):
1062 """internal method that actually do update content"""
1062 """internal method that actually do update content"""
1063 assert self._start_rev is None or (
1063 assert self._start_rev is None or (
1064 start_rev < self._start_rev or start_rev > self._end_rev
1064 start_rev < self._start_rev or start_rev > self._end_rev
1065 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1066 assert self._start_rev is None or (
1066 assert self._start_rev is None or (
1067 end_rev < self._start_rev or end_rev > self._end_rev
1067 end_rev < self._start_rev or end_rev > self._end_rev
1068 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1068 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1069 cache = self.snapshots
1069 cache = self.snapshots
1070 if util.safehasattr(revlog.index, b'findsnapshots'):
1070 if util.safehasattr(revlog.index, b'findsnapshots'):
1071 revlog.index.findsnapshots(cache, start_rev, end_rev)
1071 revlog.index.findsnapshots(cache, start_rev, end_rev)
1072 else:
1072 else:
1073 deltaparent = revlog.deltaparent
1073 deltaparent = revlog.deltaparent
1074 issnapshot = revlog.issnapshot
1074 issnapshot = revlog.issnapshot
1075 for rev in revlog.revs(start_rev, end_rev):
1075 for rev in revlog.revs(start_rev, end_rev):
1076 if issnapshot(rev):
1076 if issnapshot(rev):
1077 cache[deltaparent(rev)].add(rev)
1077 cache[deltaparent(rev)].add(rev)
1078
1078
1079
1079
1080 class deltacomputer:
1080 class deltacomputer:
1081 def __init__(
1081 def __init__(
1082 self,
1082 self,
1083 revlog,
1083 revlog,
1084 write_debug=None,
1084 write_debug=None,
1085 debug_search=False,
1085 debug_search=False,
1086 debug_info=None,
1086 debug_info=None,
1087 ):
1087 ):
1088 self.revlog = revlog
1088 self.revlog = revlog
1089 self._write_debug = write_debug
1089 self._write_debug = write_debug
1090 self._debug_search = debug_search
1090 self._debug_search = debug_search
1091 self._debug_info = debug_info
1091 self._debug_info = debug_info
1092 self._snapshot_cache = SnapshotCache()
1092 self._snapshot_cache = SnapshotCache()
1093
1093
1094 @property
1095 def _gather_debug(self):
1096 return self._write_debug is not None or self._debug_info is not None
1097
1094 def buildtext(self, revinfo, fh):
1098 def buildtext(self, revinfo, fh):
1095 """Builds a fulltext version of a revision
1099 """Builds a fulltext version of a revision
1096
1100
1097 revinfo: revisioninfo instance that contains all needed info
1101 revinfo: revisioninfo instance that contains all needed info
1098 fh: file handle to either the .i or the .d revlog file,
1102 fh: file handle to either the .i or the .d revlog file,
1099 depending on whether it is inlined or not
1103 depending on whether it is inlined or not
1100 """
1104 """
1101 btext = revinfo.btext
1105 btext = revinfo.btext
1102 if btext[0] is not None:
1106 if btext[0] is not None:
1103 return btext[0]
1107 return btext[0]
1104
1108
1105 revlog = self.revlog
1109 revlog = self.revlog
1106 cachedelta = revinfo.cachedelta
1110 cachedelta = revinfo.cachedelta
1107 baserev = cachedelta[0]
1111 baserev = cachedelta[0]
1108 delta = cachedelta[1]
1112 delta = cachedelta[1]
1109
1113
1110 fulltext = btext[0] = _textfromdelta(
1114 fulltext = btext[0] = _textfromdelta(
1111 fh,
1115 fh,
1112 revlog,
1116 revlog,
1113 baserev,
1117 baserev,
1114 delta,
1118 delta,
1115 revinfo.p1,
1119 revinfo.p1,
1116 revinfo.p2,
1120 revinfo.p2,
1117 revinfo.flags,
1121 revinfo.flags,
1118 revinfo.node,
1122 revinfo.node,
1119 )
1123 )
1120 return fulltext
1124 return fulltext
1121
1125
1122 def _builddeltadiff(self, base, revinfo, fh):
1126 def _builddeltadiff(self, base, revinfo, fh):
1123 revlog = self.revlog
1127 revlog = self.revlog
1124 t = self.buildtext(revinfo, fh)
1128 t = self.buildtext(revinfo, fh)
1125 if revlog.iscensored(base):
1129 if revlog.iscensored(base):
1126 # deltas based on a censored revision must replace the
1130 # deltas based on a censored revision must replace the
1127 # full content in one patch, so delta works everywhere
1131 # full content in one patch, so delta works everywhere
1128 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1132 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1129 delta = header + t
1133 delta = header + t
1130 else:
1134 else:
1131 ptext = revlog.rawdata(base, _df=fh)
1135 ptext = revlog.rawdata(base, _df=fh)
1132 delta = mdiff.textdiff(ptext, t)
1136 delta = mdiff.textdiff(ptext, t)
1133
1137
1134 return delta
1138 return delta
1135
1139
1136 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1140 def _builddeltainfo(self, revinfo, base, fh, target_rev=None):
1137 # can we use the cached delta?
1141 # can we use the cached delta?
1138 revlog = self.revlog
1142 revlog = self.revlog
1139 debug_search = self._write_debug is not None and self._debug_search
1143 debug_search = self._write_debug is not None and self._debug_search
1140 chainbase = revlog.chainbase(base)
1144 chainbase = revlog.chainbase(base)
1141 if revlog._generaldelta:
1145 if revlog._generaldelta:
1142 deltabase = base
1146 deltabase = base
1143 else:
1147 else:
1144 if target_rev is not None and base != target_rev - 1:
1148 if target_rev is not None and base != target_rev - 1:
1145 msg = (
1149 msg = (
1146 b'general delta cannot use delta for something else '
1150 b'general delta cannot use delta for something else '
1147 b'than `prev`: %d<-%d'
1151 b'than `prev`: %d<-%d'
1148 )
1152 )
1149 msg %= (base, target_rev)
1153 msg %= (base, target_rev)
1150 raise error.ProgrammingError(msg)
1154 raise error.ProgrammingError(msg)
1151 deltabase = chainbase
1155 deltabase = chainbase
1152 snapshotdepth = None
1156 snapshotdepth = None
1153 if revlog._sparserevlog and deltabase == nullrev:
1157 if revlog._sparserevlog and deltabase == nullrev:
1154 snapshotdepth = 0
1158 snapshotdepth = 0
1155 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1159 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1156 # A delta chain should always be one full snapshot,
1160 # A delta chain should always be one full snapshot,
1157 # zero or more semi-snapshots, and zero or more deltas
1161 # zero or more semi-snapshots, and zero or more deltas
1158 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1162 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1159 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1163 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1160 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1164 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1161 delta = None
1165 delta = None
1162 if revinfo.cachedelta:
1166 if revinfo.cachedelta:
1163 cachebase = revinfo.cachedelta[0]
1167 cachebase = revinfo.cachedelta[0]
1164 # check if the diff still apply
1168 # check if the diff still apply
1165 currentbase = cachebase
1169 currentbase = cachebase
1166 while (
1170 while (
1167 currentbase != nullrev
1171 currentbase != nullrev
1168 and currentbase != base
1172 and currentbase != base
1169 and self.revlog.length(currentbase) == 0
1173 and self.revlog.length(currentbase) == 0
1170 ):
1174 ):
1171 currentbase = self.revlog.deltaparent(currentbase)
1175 currentbase = self.revlog.deltaparent(currentbase)
1172 if self.revlog._lazydelta and currentbase == base:
1176 if self.revlog._lazydelta and currentbase == base:
1173 delta = revinfo.cachedelta[1]
1177 delta = revinfo.cachedelta[1]
1174 if delta is None:
1178 if delta is None:
1175 delta = self._builddeltadiff(base, revinfo, fh)
1179 delta = self._builddeltadiff(base, revinfo, fh)
1176 if debug_search:
1180 if debug_search:
1177 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1181 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1178 msg %= len(delta)
1182 msg %= len(delta)
1179 self._write_debug(msg)
1183 self._write_debug(msg)
1180 # snapshotdept need to be neither None nor 0 level snapshot
1184 # snapshotdept need to be neither None nor 0 level snapshot
1181 if revlog.upperboundcomp is not None and snapshotdepth:
1185 if revlog.upperboundcomp is not None and snapshotdepth:
1182 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1186 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1183 snapshotlimit = revinfo.textlen >> snapshotdepth
1187 snapshotlimit = revinfo.textlen >> snapshotdepth
1184 if debug_search:
1188 if debug_search:
1185 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1189 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1186 msg %= lowestrealisticdeltalen
1190 msg %= lowestrealisticdeltalen
1187 self._write_debug(msg)
1191 self._write_debug(msg)
1188 if snapshotlimit < lowestrealisticdeltalen:
1192 if snapshotlimit < lowestrealisticdeltalen:
1189 if debug_search:
1193 if debug_search:
1190 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1194 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1191 self._write_debug(msg)
1195 self._write_debug(msg)
1192 return None
1196 return None
1193 if revlog.length(base) < lowestrealisticdeltalen:
1197 if revlog.length(base) < lowestrealisticdeltalen:
1194 if debug_search:
1198 if debug_search:
1195 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1199 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1196 self._write_debug(msg)
1200 self._write_debug(msg)
1197 return None
1201 return None
1198 header, data = revlog.compress(delta)
1202 header, data = revlog.compress(delta)
1199 deltalen = len(header) + len(data)
1203 deltalen = len(header) + len(data)
1200 offset = revlog.end(len(revlog) - 1)
1204 offset = revlog.end(len(revlog) - 1)
1201 dist = deltalen + offset - revlog.start(chainbase)
1205 dist = deltalen + offset - revlog.start(chainbase)
1202 chainlen, compresseddeltalen = revlog._chaininfo(base)
1206 chainlen, compresseddeltalen = revlog._chaininfo(base)
1203 chainlen += 1
1207 chainlen += 1
1204 compresseddeltalen += deltalen
1208 compresseddeltalen += deltalen
1205
1209
1206 return _deltainfo(
1210 return _deltainfo(
1207 dist,
1211 dist,
1208 deltalen,
1212 deltalen,
1209 (header, data),
1213 (header, data),
1210 deltabase,
1214 deltabase,
1211 chainbase,
1215 chainbase,
1212 chainlen,
1216 chainlen,
1213 compresseddeltalen,
1217 compresseddeltalen,
1214 snapshotdepth,
1218 snapshotdepth,
1215 )
1219 )
1216
1220
1217 def _fullsnapshotinfo(self, fh, revinfo, curr):
1221 def _fullsnapshotinfo(self, fh, revinfo, curr):
1218 rawtext = self.buildtext(revinfo, fh)
1222 rawtext = self.buildtext(revinfo, fh)
1219 data = self.revlog.compress(rawtext)
1223 data = self.revlog.compress(rawtext)
1220 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1224 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1221 deltabase = chainbase = curr
1225 deltabase = chainbase = curr
1222 snapshotdepth = 0
1226 snapshotdepth = 0
1223 chainlen = 1
1227 chainlen = 1
1224
1228
1225 return _deltainfo(
1229 return _deltainfo(
1226 dist,
1230 dist,
1227 deltalen,
1231 deltalen,
1228 data,
1232 data,
1229 deltabase,
1233 deltabase,
1230 chainbase,
1234 chainbase,
1231 chainlen,
1235 chainlen,
1232 compresseddeltalen,
1236 compresseddeltalen,
1233 snapshotdepth,
1237 snapshotdepth,
1234 )
1238 )
1235
1239
1236 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1240 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1237 """Find an acceptable delta against a candidate revision
1241 """Find an acceptable delta against a candidate revision
1238
1242
1239 revinfo: information about the revision (instance of _revisioninfo)
1243 revinfo: information about the revision (instance of _revisioninfo)
1240 fh: file handle to either the .i or the .d revlog file,
1244 fh: file handle to either the .i or the .d revlog file,
1241 depending on whether it is inlined or not
1245 depending on whether it is inlined or not
1242
1246
1243 Returns the first acceptable candidate revision, as ordered by
1247 Returns the first acceptable candidate revision, as ordered by
1244 _candidategroups
1248 _candidategroups
1245
1249
1246 If no suitable deltabase is found, we return delta info for a full
1250 If no suitable deltabase is found, we return delta info for a full
1247 snapshot.
1251 snapshot.
1248
1252
1249 `excluded_bases` is an optional set of revision that cannot be used as
1253 `excluded_bases` is an optional set of revision that cannot be used as
1250 a delta base. Use this to recompute delta suitable in censor or strip
1254 a delta base. Use this to recompute delta suitable in censor or strip
1251 context.
1255 context.
1252 """
1256 """
1253 if target_rev is None:
1257 if target_rev is None:
1254 target_rev = len(self.revlog)
1258 target_rev = len(self.revlog)
1255
1259
1256 if not revinfo.textlen:
1260 if not revinfo.textlen:
1257 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1261 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1258
1262
1259 if excluded_bases is None:
1263 if excluded_bases is None:
1260 excluded_bases = set()
1264 excluded_bases = set()
1261
1265
1262 # no delta for flag processor revision (see "candelta" for why)
1266 # no delta for flag processor revision (see "candelta" for why)
1263 # not calling candelta since only one revision needs test, also to
1267 # not calling candelta since only one revision needs test, also to
1264 # avoid overhead fetching flags again.
1268 # avoid overhead fetching flags again.
1265 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1269 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1266 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1270 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1267
1271
1268 gather_debug = (
1269 self._write_debug is not None or self._debug_info is not None
1270 )
1271 debug_search = self._write_debug is not None and self._debug_search
1272 debug_search = self._write_debug is not None and self._debug_search
1273 gather_debug = self._gather_debug
1272
1274
1273 if gather_debug:
1275 if gather_debug:
1274 start = util.timer()
1276 start = util.timer()
1275
1277
1276 # count the number of different delta we tried (for debug purpose)
1278 # count the number of different delta we tried (for debug purpose)
1277 dbg_try_count = 0
1279 dbg_try_count = 0
1278 # count the number of "search round" we did. (for debug purpose)
1280 # count the number of "search round" we did. (for debug purpose)
1279 dbg_try_rounds = 0
1281 dbg_try_rounds = 0
1280 dbg_type = b'unknown'
1282 dbg_type = b'unknown'
1281
1283
1282 cachedelta = revinfo.cachedelta
1284 cachedelta = revinfo.cachedelta
1283 p1 = revinfo.p1
1285 p1 = revinfo.p1
1284 p2 = revinfo.p2
1286 p2 = revinfo.p2
1285 revlog = self.revlog
1287 revlog = self.revlog
1286
1288
1287 deltainfo = None
1289 deltainfo = None
1288 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1290 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1289
1291
1290 if gather_debug:
1292 if gather_debug:
1291 if p1r != nullrev:
1293 if p1r != nullrev:
1292 p1_chain_len = revlog._chaininfo(p1r)[0]
1294 p1_chain_len = revlog._chaininfo(p1r)[0]
1293 else:
1295 else:
1294 p1_chain_len = -1
1296 p1_chain_len = -1
1295 if p2r != nullrev:
1297 if p2r != nullrev:
1296 p2_chain_len = revlog._chaininfo(p2r)[0]
1298 p2_chain_len = revlog._chaininfo(p2r)[0]
1297 else:
1299 else:
1298 p2_chain_len = -1
1300 p2_chain_len = -1
1299 if debug_search:
1301 if debug_search:
1300 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1302 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1301 msg %= target_rev
1303 msg %= target_rev
1302 self._write_debug(msg)
1304 self._write_debug(msg)
1303
1305
1304 groups = _candidategroups(
1306 groups = _candidategroups(
1305 self.revlog,
1307 self.revlog,
1306 revinfo.textlen,
1308 revinfo.textlen,
1307 p1r,
1309 p1r,
1308 p2r,
1310 p2r,
1309 cachedelta,
1311 cachedelta,
1310 excluded_bases,
1312 excluded_bases,
1311 target_rev,
1313 target_rev,
1312 snapshot_cache=self._snapshot_cache,
1314 snapshot_cache=self._snapshot_cache,
1313 )
1315 )
1314 candidaterevs = next(groups)
1316 candidaterevs = next(groups)
1315 while candidaterevs is not None:
1317 while candidaterevs is not None:
1316 dbg_try_rounds += 1
1318 dbg_try_rounds += 1
1317 if debug_search:
1319 if debug_search:
1318 prev = None
1320 prev = None
1319 if deltainfo is not None:
1321 if deltainfo is not None:
1320 prev = deltainfo.base
1322 prev = deltainfo.base
1321
1323
1322 if (
1324 if (
1323 cachedelta is not None
1325 cachedelta is not None
1324 and len(candidaterevs) == 1
1326 and len(candidaterevs) == 1
1325 and cachedelta[0] in candidaterevs
1327 and cachedelta[0] in candidaterevs
1326 ):
1328 ):
1327 round_type = b"cached-delta"
1329 round_type = b"cached-delta"
1328 elif p1 in candidaterevs or p2 in candidaterevs:
1330 elif p1 in candidaterevs or p2 in candidaterevs:
1329 round_type = b"parents"
1331 round_type = b"parents"
1330 elif prev is not None and all(c < prev for c in candidaterevs):
1332 elif prev is not None and all(c < prev for c in candidaterevs):
1331 round_type = b"refine-down"
1333 round_type = b"refine-down"
1332 elif prev is not None and all(c > prev for c in candidaterevs):
1334 elif prev is not None and all(c > prev for c in candidaterevs):
1333 round_type = b"refine-up"
1335 round_type = b"refine-up"
1334 else:
1336 else:
1335 round_type = b"search-down"
1337 round_type = b"search-down"
1336 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1338 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1337 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1339 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1338 self._write_debug(msg)
1340 self._write_debug(msg)
1339 nominateddeltas = []
1341 nominateddeltas = []
1340 if deltainfo is not None:
1342 if deltainfo is not None:
1341 if debug_search:
1343 if debug_search:
1342 msg = (
1344 msg = (
1343 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1345 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1344 )
1346 )
1345 msg %= (deltainfo.base, deltainfo.deltalen)
1347 msg %= (deltainfo.base, deltainfo.deltalen)
1346 self._write_debug(msg)
1348 self._write_debug(msg)
1347 # if we already found a good delta,
1349 # if we already found a good delta,
1348 # challenge it against refined candidates
1350 # challenge it against refined candidates
1349 nominateddeltas.append(deltainfo)
1351 nominateddeltas.append(deltainfo)
1350 for candidaterev in candidaterevs:
1352 for candidaterev in candidaterevs:
1351 if debug_search:
1353 if debug_search:
1352 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1354 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1353 msg %= candidaterev
1355 msg %= candidaterev
1354 self._write_debug(msg)
1356 self._write_debug(msg)
1355 candidate_type = None
1357 candidate_type = None
1356 if candidaterev == p1:
1358 if candidaterev == p1:
1357 candidate_type = b"p1"
1359 candidate_type = b"p1"
1358 elif candidaterev == p2:
1360 elif candidaterev == p2:
1359 candidate_type = b"p2"
1361 candidate_type = b"p2"
1360 elif self.revlog.issnapshot(candidaterev):
1362 elif self.revlog.issnapshot(candidaterev):
1361 candidate_type = b"snapshot-%d"
1363 candidate_type = b"snapshot-%d"
1362 candidate_type %= self.revlog.snapshotdepth(
1364 candidate_type %= self.revlog.snapshotdepth(
1363 candidaterev
1365 candidaterev
1364 )
1366 )
1365
1367
1366 if candidate_type is not None:
1368 if candidate_type is not None:
1367 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1369 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1368 msg %= candidate_type
1370 msg %= candidate_type
1369 self._write_debug(msg)
1371 self._write_debug(msg)
1370 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1372 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1371 msg %= self.revlog.length(candidaterev)
1373 msg %= self.revlog.length(candidaterev)
1372 self._write_debug(msg)
1374 self._write_debug(msg)
1373 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1375 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1374 msg %= self.revlog.deltaparent(candidaterev)
1376 msg %= self.revlog.deltaparent(candidaterev)
1375 self._write_debug(msg)
1377 self._write_debug(msg)
1376
1378
1377 dbg_try_count += 1
1379 dbg_try_count += 1
1378
1380
1379 if debug_search:
1381 if debug_search:
1380 delta_start = util.timer()
1382 delta_start = util.timer()
1381 candidatedelta = self._builddeltainfo(
1383 candidatedelta = self._builddeltainfo(
1382 revinfo,
1384 revinfo,
1383 candidaterev,
1385 candidaterev,
1384 fh,
1386 fh,
1385 target_rev=target_rev,
1387 target_rev=target_rev,
1386 )
1388 )
1387 if debug_search:
1389 if debug_search:
1388 delta_end = util.timer()
1390 delta_end = util.timer()
1389 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1391 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1390 msg %= delta_end - delta_start
1392 msg %= delta_end - delta_start
1391 self._write_debug(msg)
1393 self._write_debug(msg)
1392 if candidatedelta is not None:
1394 if candidatedelta is not None:
1393 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1395 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1394 if debug_search:
1396 if debug_search:
1395 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1397 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1396 msg %= candidatedelta.deltalen
1398 msg %= candidatedelta.deltalen
1397 self._write_debug(msg)
1399 self._write_debug(msg)
1398 nominateddeltas.append(candidatedelta)
1400 nominateddeltas.append(candidatedelta)
1399 elif debug_search:
1401 elif debug_search:
1400 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1402 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1401 msg %= candidatedelta.deltalen
1403 msg %= candidatedelta.deltalen
1402 self._write_debug(msg)
1404 self._write_debug(msg)
1403 elif debug_search:
1405 elif debug_search:
1404 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1406 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1405 self._write_debug(msg)
1407 self._write_debug(msg)
1406 if nominateddeltas:
1408 if nominateddeltas:
1407 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1409 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1408 if deltainfo is not None:
1410 if deltainfo is not None:
1409 candidaterevs = groups.send(deltainfo.base)
1411 candidaterevs = groups.send(deltainfo.base)
1410 else:
1412 else:
1411 candidaterevs = next(groups)
1413 candidaterevs = next(groups)
1412
1414
1413 if deltainfo is None:
1415 if deltainfo is None:
1414 dbg_type = b"full"
1416 dbg_type = b"full"
1415 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1417 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1416 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1418 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1417 dbg_type = b"snapshot"
1419 dbg_type = b"snapshot"
1418 else:
1420 else:
1419 dbg_type = b"delta"
1421 dbg_type = b"delta"
1420
1422
1421 if gather_debug:
1423 if gather_debug:
1422 end = util.timer()
1424 end = util.timer()
1423 if dbg_type == b'full':
1425 if dbg_type == b'full':
1424 used_cached = (
1426 used_cached = (
1425 cachedelta is not None
1427 cachedelta is not None
1426 and dbg_try_rounds == 0
1428 and dbg_try_rounds == 0
1427 and dbg_try_count == 0
1429 and dbg_try_count == 0
1428 and cachedelta[0] == nullrev
1430 and cachedelta[0] == nullrev
1429 )
1431 )
1430 else:
1432 else:
1431 used_cached = (
1433 used_cached = (
1432 cachedelta is not None
1434 cachedelta is not None
1433 and dbg_try_rounds == 1
1435 and dbg_try_rounds == 1
1434 and dbg_try_count == 1
1436 and dbg_try_count == 1
1435 and deltainfo.base == cachedelta[0]
1437 and deltainfo.base == cachedelta[0]
1436 )
1438 )
1437 dbg = {
1439 dbg = {
1438 'duration': end - start,
1440 'duration': end - start,
1439 'revision': target_rev,
1441 'revision': target_rev,
1440 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1442 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1441 'search_round_count': dbg_try_rounds,
1443 'search_round_count': dbg_try_rounds,
1442 'using-cached-base': used_cached,
1444 'using-cached-base': used_cached,
1443 'delta_try_count': dbg_try_count,
1445 'delta_try_count': dbg_try_count,
1444 'type': dbg_type,
1446 'type': dbg_type,
1445 'p1-chain-len': p1_chain_len,
1447 'p1-chain-len': p1_chain_len,
1446 'p2-chain-len': p2_chain_len,
1448 'p2-chain-len': p2_chain_len,
1447 }
1449 }
1448 if (
1450 if (
1449 deltainfo.snapshotdepth # pytype: disable=attribute-error
1451 deltainfo.snapshotdepth # pytype: disable=attribute-error
1450 is not None
1452 is not None
1451 ):
1453 ):
1452 dbg[
1454 dbg[
1453 'snapshot-depth'
1455 'snapshot-depth'
1454 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1456 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1455 else:
1457 else:
1456 dbg['snapshot-depth'] = 0
1458 dbg['snapshot-depth'] = 0
1457 target_revlog = b"UNKNOWN"
1459 target_revlog = b"UNKNOWN"
1458 target_type = self.revlog.target[0]
1460 target_type = self.revlog.target[0]
1459 target_key = self.revlog.target[1]
1461 target_key = self.revlog.target[1]
1460 if target_type == KIND_CHANGELOG:
1462 if target_type == KIND_CHANGELOG:
1461 target_revlog = b'CHANGELOG:'
1463 target_revlog = b'CHANGELOG:'
1462 elif target_type == KIND_MANIFESTLOG:
1464 elif target_type == KIND_MANIFESTLOG:
1463 target_revlog = b'MANIFESTLOG:'
1465 target_revlog = b'MANIFESTLOG:'
1464 if target_key:
1466 if target_key:
1465 target_revlog += b'%s:' % target_key
1467 target_revlog += b'%s:' % target_key
1466 elif target_type == KIND_FILELOG:
1468 elif target_type == KIND_FILELOG:
1467 target_revlog = b'FILELOG:'
1469 target_revlog = b'FILELOG:'
1468 if target_key:
1470 if target_key:
1469 target_revlog += b'%s:' % target_key
1471 target_revlog += b'%s:' % target_key
1470 dbg['target-revlog'] = target_revlog
1472 dbg['target-revlog'] = target_revlog
1471
1473
1472 if self._debug_info is not None:
1474 if self._debug_info is not None:
1473 self._debug_info.append(dbg)
1475 self._debug_info.append(dbg)
1474
1476
1475 if self._write_debug is not None:
1477 if self._write_debug is not None:
1476 msg = (
1478 msg = (
1477 b"DBG-DELTAS:"
1479 b"DBG-DELTAS:"
1478 b" %-12s"
1480 b" %-12s"
1479 b" rev=%d:"
1481 b" rev=%d:"
1480 b" delta-base=%d"
1482 b" delta-base=%d"
1481 b" is-cached=%d"
1483 b" is-cached=%d"
1482 b" - search-rounds=%d"
1484 b" - search-rounds=%d"
1483 b" try-count=%d"
1485 b" try-count=%d"
1484 b" - delta-type=%-6s"
1486 b" - delta-type=%-6s"
1485 b" snap-depth=%d"
1487 b" snap-depth=%d"
1486 b" - p1-chain-length=%d"
1488 b" - p1-chain-length=%d"
1487 b" p2-chain-length=%d"
1489 b" p2-chain-length=%d"
1488 b" - duration=%f"
1490 b" - duration=%f"
1489 b"\n"
1491 b"\n"
1490 )
1492 )
1491 msg %= (
1493 msg %= (
1492 dbg["target-revlog"],
1494 dbg["target-revlog"],
1493 dbg["revision"],
1495 dbg["revision"],
1494 dbg["delta-base"],
1496 dbg["delta-base"],
1495 dbg["using-cached-base"],
1497 dbg["using-cached-base"],
1496 dbg["search_round_count"],
1498 dbg["search_round_count"],
1497 dbg["delta_try_count"],
1499 dbg["delta_try_count"],
1498 dbg["type"],
1500 dbg["type"],
1499 dbg["snapshot-depth"],
1501 dbg["snapshot-depth"],
1500 dbg["p1-chain-len"],
1502 dbg["p1-chain-len"],
1501 dbg["p2-chain-len"],
1503 dbg["p2-chain-len"],
1502 dbg["duration"],
1504 dbg["duration"],
1503 )
1505 )
1504 self._write_debug(msg)
1506 self._write_debug(msg)
1505 return deltainfo
1507 return deltainfo
1506
1508
1507
1509
1508 def delta_compression(default_compression_header, deltainfo):
1510 def delta_compression(default_compression_header, deltainfo):
1509 """return (COMPRESSION_MODE, deltainfo)
1511 """return (COMPRESSION_MODE, deltainfo)
1510
1512
1511 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1513 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1512 compression.
1514 compression.
1513 """
1515 """
1514 h, d = deltainfo.data
1516 h, d = deltainfo.data
1515 compression_mode = COMP_MODE_INLINE
1517 compression_mode = COMP_MODE_INLINE
1516 if not h and not d:
1518 if not h and not d:
1517 # not data to store at all... declare them uncompressed
1519 # not data to store at all... declare them uncompressed
1518 compression_mode = COMP_MODE_PLAIN
1520 compression_mode = COMP_MODE_PLAIN
1519 elif not h:
1521 elif not h:
1520 t = d[0:1]
1522 t = d[0:1]
1521 if t == b'\0':
1523 if t == b'\0':
1522 compression_mode = COMP_MODE_PLAIN
1524 compression_mode = COMP_MODE_PLAIN
1523 elif t == default_compression_header:
1525 elif t == default_compression_header:
1524 compression_mode = COMP_MODE_DEFAULT
1526 compression_mode = COMP_MODE_DEFAULT
1525 elif h == b'u':
1527 elif h == b'u':
1526 # we have a more efficient way to declare uncompressed
1528 # we have a more efficient way to declare uncompressed
1527 h = b''
1529 h = b''
1528 compression_mode = COMP_MODE_PLAIN
1530 compression_mode = COMP_MODE_PLAIN
1529 deltainfo = drop_u_compression(deltainfo)
1531 deltainfo = drop_u_compression(deltainfo)
1530 return compression_mode, deltainfo
1532 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now