##// END OF EJS Templates
delta-find: make sure we only use newer full snapshot as candidate...
marmoute -
r50575:40e24d82 default
parent child Browse files
Show More
@@ -1,1471 +1,1472 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 self._data = data
50 self._data = data
51 self._srdensitythreshold = density
51 self._srdensitythreshold = density
52 self._srmingapsize = mingap
52 self._srmingapsize = mingap
53 self._snapshot = set(snapshot)
53 self._snapshot = set(snapshot)
54 self.index = None
54 self.index = None
55
55
56 def start(self, rev):
56 def start(self, rev):
57 if rev == nullrev:
57 if rev == nullrev:
58 return 0
58 return 0
59 if rev == 0:
59 if rev == 0:
60 return 0
60 return 0
61 return self._data[rev - 1]
61 return self._data[rev - 1]
62
62
63 def end(self, rev):
63 def end(self, rev):
64 if rev == nullrev:
64 if rev == nullrev:
65 return 0
65 return 0
66 return self._data[rev]
66 return self._data[rev]
67
67
68 def length(self, rev):
68 def length(self, rev):
69 return self.end(rev) - self.start(rev)
69 return self.end(rev) - self.start(rev)
70
70
71 def __len__(self):
71 def __len__(self):
72 return len(self._data)
72 return len(self._data)
73
73
74 def issnapshot(self, rev):
74 def issnapshot(self, rev):
75 if rev == nullrev:
75 if rev == nullrev:
76 return True
76 return True
77 return rev in self._snapshot
77 return rev in self._snapshot
78
78
79
79
80 def slicechunk(revlog, revs, targetsize=None):
80 def slicechunk(revlog, revs, targetsize=None):
81 """slice revs to reduce the amount of unrelated data to be read from disk.
81 """slice revs to reduce the amount of unrelated data to be read from disk.
82
82
83 ``revs`` is sliced into groups that should be read in one time.
83 ``revs`` is sliced into groups that should be read in one time.
84 Assume that revs are sorted.
84 Assume that revs are sorted.
85
85
86 The initial chunk is sliced until the overall density (payload/chunks-span
86 The initial chunk is sliced until the overall density (payload/chunks-span
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 `revlog._srmingapsize` is skipped.
88 `revlog._srmingapsize` is skipped.
89
89
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 For consistency with other slicing choice, this limit won't go lower than
91 For consistency with other slicing choice, this limit won't go lower than
92 `revlog._srmingapsize`.
92 `revlog._srmingapsize`.
93
93
94 If individual revisions chunk are larger than this limit, they will still
94 If individual revisions chunk are larger than this limit, they will still
95 be raised individually.
95 be raised individually.
96
96
97 >>> data = [
97 >>> data = [
98 ... 5, #00 (5)
98 ... 5, #00 (5)
99 ... 10, #01 (5)
99 ... 10, #01 (5)
100 ... 12, #02 (2)
100 ... 12, #02 (2)
101 ... 12, #03 (empty)
101 ... 12, #03 (empty)
102 ... 27, #04 (15)
102 ... 27, #04 (15)
103 ... 31, #05 (4)
103 ... 31, #05 (4)
104 ... 31, #06 (empty)
104 ... 31, #06 (empty)
105 ... 42, #07 (11)
105 ... 42, #07 (11)
106 ... 47, #08 (5)
106 ... 47, #08 (5)
107 ... 47, #09 (empty)
107 ... 47, #09 (empty)
108 ... 48, #10 (1)
108 ... 48, #10 (1)
109 ... 51, #11 (3)
109 ... 51, #11 (3)
110 ... 74, #12 (23)
110 ... 74, #12 (23)
111 ... 85, #13 (11)
111 ... 85, #13 (11)
112 ... 86, #14 (1)
112 ... 86, #14 (1)
113 ... 91, #15 (5)
113 ... 91, #15 (5)
114 ... ]
114 ... ]
115 >>> revlog = _testrevlog(data, snapshot=range(16))
115 >>> revlog = _testrevlog(data, snapshot=range(16))
116
116
117 >>> list(slicechunk(revlog, list(range(16))))
117 >>> list(slicechunk(revlog, list(range(16))))
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 >>> list(slicechunk(revlog, [0, 15]))
119 >>> list(slicechunk(revlog, [0, 15]))
120 [[0], [15]]
120 [[0], [15]]
121 >>> list(slicechunk(revlog, [0, 11, 15]))
121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 [[0], [11], [15]]
122 [[0], [11], [15]]
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 [[0], [11, 13, 15]]
124 [[0], [11, 13, 15]]
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 [[1, 2], [5, 8, 10, 11], [14]]
126 [[1, 2], [5, 8, 10, 11], [14]]
127
127
128 Slicing with a maximum chunk size
128 Slicing with a maximum chunk size
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 [[0], [11], [13], [15]]
130 [[0], [11], [13], [15]]
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 [[0], [11], [13, 15]]
132 [[0], [11], [13, 15]]
133
133
134 Slicing involving nullrev
134 Slicing involving nullrev
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 [[-1, 0], [11], [13, 15]]
136 [[-1, 0], [11], [13, 15]]
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 [[-1], [13], [15]]
138 [[-1], [13], [15]]
139 """
139 """
140 if targetsize is not None:
140 if targetsize is not None:
141 targetsize = max(targetsize, revlog._srmingapsize)
141 targetsize = max(targetsize, revlog._srmingapsize)
142 # targetsize should not be specified when evaluating delta candidates:
142 # targetsize should not be specified when evaluating delta candidates:
143 # * targetsize is used to ensure we stay within specification when reading,
143 # * targetsize is used to ensure we stay within specification when reading,
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 if densityslicing is None:
145 if densityslicing is None:
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 for chunk in densityslicing(
147 for chunk in densityslicing(
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 ):
149 ):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 yield subchunk
151 yield subchunk
152
152
153
153
154 def _slicechunktosize(revlog, revs, targetsize=None):
154 def _slicechunktosize(revlog, revs, targetsize=None):
155 """slice revs to match the target size
155 """slice revs to match the target size
156
156
157 This is intended to be used on chunk that density slicing selected by that
157 This is intended to be used on chunk that density slicing selected by that
158 are still too large compared to the read garantee of revlog. This might
158 are still too large compared to the read garantee of revlog. This might
159 happens when "minimal gap size" interrupted the slicing or when chain are
159 happens when "minimal gap size" interrupted the slicing or when chain are
160 built in a way that create large blocks next to each other.
160 built in a way that create large blocks next to each other.
161
161
162 >>> data = [
162 >>> data = [
163 ... 3, #0 (3)
163 ... 3, #0 (3)
164 ... 5, #1 (2)
164 ... 5, #1 (2)
165 ... 6, #2 (1)
165 ... 6, #2 (1)
166 ... 8, #3 (2)
166 ... 8, #3 (2)
167 ... 8, #4 (empty)
167 ... 8, #4 (empty)
168 ... 11, #5 (3)
168 ... 11, #5 (3)
169 ... 12, #6 (1)
169 ... 12, #6 (1)
170 ... 13, #7 (1)
170 ... 13, #7 (1)
171 ... 14, #8 (1)
171 ... 14, #8 (1)
172 ... ]
172 ... ]
173
173
174 == All snapshots cases ==
174 == All snapshots cases ==
175 >>> revlog = _testrevlog(data, snapshot=range(9))
175 >>> revlog = _testrevlog(data, snapshot=range(9))
176
176
177 Cases where chunk is already small enough
177 Cases where chunk is already small enough
178 >>> list(_slicechunktosize(revlog, [0], 3))
178 >>> list(_slicechunktosize(revlog, [0], 3))
179 [[0]]
179 [[0]]
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 [[6, 7]]
181 [[6, 7]]
182 >>> list(_slicechunktosize(revlog, [0], None))
182 >>> list(_slicechunktosize(revlog, [0], None))
183 [[0]]
183 [[0]]
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 [[6, 7]]
185 [[6, 7]]
186
186
187 cases where we need actual slicing
187 cases where we need actual slicing
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 [[0], [1]]
189 [[0], [1]]
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 [[1], [3]]
191 [[1], [3]]
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 [[1, 2], [3]]
193 [[1, 2], [3]]
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 [[3], [5]]
195 [[3], [5]]
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 [[3], [5]]
197 [[3], [5]]
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 [[5], [6, 7, 8]]
199 [[5], [6, 7, 8]]
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202
202
203 Case with too large individual chunk (must return valid chunk)
203 Case with too large individual chunk (must return valid chunk)
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 [[3], [5]]
209 [[3], [5]]
210
210
211 == No Snapshot cases ==
211 == No Snapshot cases ==
212 >>> revlog = _testrevlog(data)
212 >>> revlog = _testrevlog(data)
213
213
214 Cases where chunk is already small enough
214 Cases where chunk is already small enough
215 >>> list(_slicechunktosize(revlog, [0], 3))
215 >>> list(_slicechunktosize(revlog, [0], 3))
216 [[0]]
216 [[0]]
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 [[6, 7]]
218 [[6, 7]]
219 >>> list(_slicechunktosize(revlog, [0], None))
219 >>> list(_slicechunktosize(revlog, [0], None))
220 [[0]]
220 [[0]]
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 [[6, 7]]
222 [[6, 7]]
223
223
224 cases where we need actual slicing
224 cases where we need actual slicing
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 [[0], [1]]
226 [[0], [1]]
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 [[1], [3]]
228 [[1], [3]]
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 [[1], [2, 3]]
230 [[1], [2, 3]]
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 [[3], [5]]
232 [[3], [5]]
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 [[3], [4, 5]]
234 [[3], [4, 5]]
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 [[5], [6, 7, 8]]
236 [[5], [6, 7, 8]]
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239
239
240 Case with too large individual chunk (must return valid chunk)
240 Case with too large individual chunk (must return valid chunk)
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 [[0], [1]]
242 [[0], [1]]
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 [[1], [3]]
244 [[1], [3]]
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 [[3], [5]]
246 [[3], [5]]
247
247
248 == mixed case ==
248 == mixed case ==
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 """
252 """
253 assert targetsize is None or 0 <= targetsize
253 assert targetsize is None or 0 <= targetsize
254 startdata = revlog.start(revs[0])
254 startdata = revlog.start(revs[0])
255 enddata = revlog.end(revs[-1])
255 enddata = revlog.end(revs[-1])
256 fullspan = enddata - startdata
256 fullspan = enddata - startdata
257 if targetsize is None or fullspan <= targetsize:
257 if targetsize is None or fullspan <= targetsize:
258 yield revs
258 yield revs
259 return
259 return
260
260
261 startrevidx = 0
261 startrevidx = 0
262 endrevidx = 1
262 endrevidx = 1
263 iterrevs = enumerate(revs)
263 iterrevs = enumerate(revs)
264 next(iterrevs) # skip first rev.
264 next(iterrevs) # skip first rev.
265 # first step: get snapshots out of the way
265 # first step: get snapshots out of the way
266 for idx, r in iterrevs:
266 for idx, r in iterrevs:
267 span = revlog.end(r) - startdata
267 span = revlog.end(r) - startdata
268 snapshot = revlog.issnapshot(r)
268 snapshot = revlog.issnapshot(r)
269 if span <= targetsize and snapshot:
269 if span <= targetsize and snapshot:
270 endrevidx = idx + 1
270 endrevidx = idx + 1
271 else:
271 else:
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 if chunk:
273 if chunk:
274 yield chunk
274 yield chunk
275 startrevidx = idx
275 startrevidx = idx
276 startdata = revlog.start(r)
276 startdata = revlog.start(r)
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 if not snapshot:
278 if not snapshot:
279 break
279 break
280
280
281 # for the others, we use binary slicing to quickly converge toward valid
281 # for the others, we use binary slicing to quickly converge toward valid
282 # chunks (otherwise, we might end up looking for start/end of many
282 # chunks (otherwise, we might end up looking for start/end of many
283 # revisions). This logic is not looking for the perfect slicing point, it
283 # revisions). This logic is not looking for the perfect slicing point, it
284 # focuses on quickly converging toward valid chunks.
284 # focuses on quickly converging toward valid chunks.
285 nbitem = len(revs)
285 nbitem = len(revs)
286 while (enddata - startdata) > targetsize:
286 while (enddata - startdata) > targetsize:
287 endrevidx = nbitem
287 endrevidx = nbitem
288 if nbitem - startrevidx <= 1:
288 if nbitem - startrevidx <= 1:
289 break # protect against individual chunk larger than limit
289 break # protect against individual chunk larger than limit
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 while span > targetsize:
292 while span > targetsize:
293 if endrevidx - startrevidx <= 1:
293 if endrevidx - startrevidx <= 1:
294 break # protect against individual chunk larger than limit
294 break # protect against individual chunk larger than limit
295 endrevidx -= (endrevidx - startrevidx) // 2
295 endrevidx -= (endrevidx - startrevidx) // 2
296 localenddata = revlog.end(revs[endrevidx - 1])
296 localenddata = revlog.end(revs[endrevidx - 1])
297 span = localenddata - startdata
297 span = localenddata - startdata
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301 startrevidx = endrevidx
301 startrevidx = endrevidx
302 startdata = revlog.start(revs[startrevidx])
302 startdata = revlog.start(revs[startrevidx])
303
303
304 chunk = _trimchunk(revlog, revs, startrevidx)
304 chunk = _trimchunk(revlog, revs, startrevidx)
305 if chunk:
305 if chunk:
306 yield chunk
306 yield chunk
307
307
308
308
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 """slice revs to reduce the amount of unrelated data to be read from disk.
310 """slice revs to reduce the amount of unrelated data to be read from disk.
311
311
312 ``revs`` is sliced into groups that should be read in one time.
312 ``revs`` is sliced into groups that should be read in one time.
313 Assume that revs are sorted.
313 Assume that revs are sorted.
314
314
315 The initial chunk is sliced until the overall density (payload/chunks-span
315 The initial chunk is sliced until the overall density (payload/chunks-span
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 skipped.
317 skipped.
318
318
319 >>> revlog = _testrevlog([
319 >>> revlog = _testrevlog([
320 ... 5, #00 (5)
320 ... 5, #00 (5)
321 ... 10, #01 (5)
321 ... 10, #01 (5)
322 ... 12, #02 (2)
322 ... 12, #02 (2)
323 ... 12, #03 (empty)
323 ... 12, #03 (empty)
324 ... 27, #04 (15)
324 ... 27, #04 (15)
325 ... 31, #05 (4)
325 ... 31, #05 (4)
326 ... 31, #06 (empty)
326 ... 31, #06 (empty)
327 ... 42, #07 (11)
327 ... 42, #07 (11)
328 ... 47, #08 (5)
328 ... 47, #08 (5)
329 ... 47, #09 (empty)
329 ... 47, #09 (empty)
330 ... 48, #10 (1)
330 ... 48, #10 (1)
331 ... 51, #11 (3)
331 ... 51, #11 (3)
332 ... 74, #12 (23)
332 ... 74, #12 (23)
333 ... 85, #13 (11)
333 ... 85, #13 (11)
334 ... 86, #14 (1)
334 ... 86, #14 (1)
335 ... 91, #15 (5)
335 ... 91, #15 (5)
336 ... ])
336 ... ])
337
337
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 [[0], [15]]
341 [[0], [15]]
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 [[0], [11], [15]]
343 [[0], [11], [15]]
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 [[0], [11, 13, 15]]
345 [[0], [11, 13, 15]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 [[1, 2], [5, 8, 10, 11], [14]]
347 [[1, 2], [5, 8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... mingapsize=20))
349 ... mingapsize=20))
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 ... targetdensity=0.95))
352 ... targetdensity=0.95))
353 [[1, 2], [5], [8, 10, 11], [14]]
353 [[1, 2], [5], [8, 10, 11], [14]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 ... targetdensity=0.95, mingapsize=12))
355 ... targetdensity=0.95, mingapsize=12))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357 """
357 """
358 start = revlog.start
358 start = revlog.start
359 length = revlog.length
359 length = revlog.length
360
360
361 if len(revs) <= 1:
361 if len(revs) <= 1:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 deltachainspan = segmentspan(revlog, revs)
365 deltachainspan = segmentspan(revlog, revs)
366
366
367 if deltachainspan < mingapsize:
367 if deltachainspan < mingapsize:
368 yield revs
368 yield revs
369 return
369 return
370
370
371 readdata = deltachainspan
371 readdata = deltachainspan
372 chainpayload = sum(length(r) for r in revs)
372 chainpayload = sum(length(r) for r in revs)
373
373
374 if deltachainspan:
374 if deltachainspan:
375 density = chainpayload / float(deltachainspan)
375 density = chainpayload / float(deltachainspan)
376 else:
376 else:
377 density = 1.0
377 density = 1.0
378
378
379 if density >= targetdensity:
379 if density >= targetdensity:
380 yield revs
380 yield revs
381 return
381 return
382
382
383 # Store the gaps in a heap to have them sorted by decreasing size
383 # Store the gaps in a heap to have them sorted by decreasing size
384 gaps = []
384 gaps = []
385 prevend = None
385 prevend = None
386 for i, rev in enumerate(revs):
386 for i, rev in enumerate(revs):
387 revstart = start(rev)
387 revstart = start(rev)
388 revlen = length(rev)
388 revlen = length(rev)
389
389
390 # Skip empty revisions to form larger holes
390 # Skip empty revisions to form larger holes
391 if revlen == 0:
391 if revlen == 0:
392 continue
392 continue
393
393
394 if prevend is not None:
394 if prevend is not None:
395 gapsize = revstart - prevend
395 gapsize = revstart - prevend
396 # only consider holes that are large enough
396 # only consider holes that are large enough
397 if gapsize > mingapsize:
397 if gapsize > mingapsize:
398 gaps.append((gapsize, i))
398 gaps.append((gapsize, i))
399
399
400 prevend = revstart + revlen
400 prevend = revstart + revlen
401 # sort the gaps to pop them from largest to small
401 # sort the gaps to pop them from largest to small
402 gaps.sort()
402 gaps.sort()
403
403
404 # Collect the indices of the largest holes until the density is acceptable
404 # Collect the indices of the largest holes until the density is acceptable
405 selected = []
405 selected = []
406 while gaps and density < targetdensity:
406 while gaps and density < targetdensity:
407 gapsize, gapidx = gaps.pop()
407 gapsize, gapidx = gaps.pop()
408
408
409 selected.append(gapidx)
409 selected.append(gapidx)
410
410
411 # the gap sizes are stored as negatives to be sorted decreasingly
411 # the gap sizes are stored as negatives to be sorted decreasingly
412 # by the heap
412 # by the heap
413 readdata -= gapsize
413 readdata -= gapsize
414 if readdata > 0:
414 if readdata > 0:
415 density = chainpayload / float(readdata)
415 density = chainpayload / float(readdata)
416 else:
416 else:
417 density = 1.0
417 density = 1.0
418 selected.sort()
418 selected.sort()
419
419
420 # Cut the revs at collected indices
420 # Cut the revs at collected indices
421 previdx = 0
421 previdx = 0
422 for idx in selected:
422 for idx in selected:
423
423
424 chunk = _trimchunk(revlog, revs, previdx, idx)
424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428 previdx = idx
428 previdx = idx
429
429
430 chunk = _trimchunk(revlog, revs, previdx)
430 chunk = _trimchunk(revlog, revs, previdx)
431 if chunk:
431 if chunk:
432 yield chunk
432 yield chunk
433
433
434
434
435 def _trimchunk(revlog, revs, startidx, endidx=None):
435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 """returns revs[startidx:endidx] without empty trailing revs
436 """returns revs[startidx:endidx] without empty trailing revs
437
437
438 Doctest Setup
438 Doctest Setup
439 >>> revlog = _testrevlog([
439 >>> revlog = _testrevlog([
440 ... 5, #0
440 ... 5, #0
441 ... 10, #1
441 ... 10, #1
442 ... 12, #2
442 ... 12, #2
443 ... 12, #3 (empty)
443 ... 12, #3 (empty)
444 ... 17, #4
444 ... 17, #4
445 ... 21, #5
445 ... 21, #5
446 ... 21, #6 (empty)
446 ... 21, #6 (empty)
447 ... ])
447 ... ])
448
448
449 Contiguous cases:
449 Contiguous cases:
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 [0, 1, 2, 3, 4, 5]
451 [0, 1, 2, 3, 4, 5]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 [0, 1, 2, 3, 4]
453 [0, 1, 2, 3, 4]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 [0, 1, 2]
455 [0, 1, 2]
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 [2]
457 [2]
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 [3, 4, 5]
459 [3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 [3, 4]
461 [3, 4]
462
462
463 Discontiguous cases:
463 Discontiguous cases:
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 [1, 3, 5]
465 [1, 3, 5]
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 [1]
467 [1]
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 [3, 5]
469 [3, 5]
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 [3, 5]
471 [3, 5]
472 """
472 """
473 length = revlog.length
473 length = revlog.length
474
474
475 if endidx is None:
475 if endidx is None:
476 endidx = len(revs)
476 endidx = len(revs)
477
477
478 # If we have a non-emtpy delta candidate, there are nothing to trim
478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 if revs[endidx - 1] < len(revlog):
479 if revs[endidx - 1] < len(revlog):
480 # Trim empty revs at the end, except the very first revision of a chain
480 # Trim empty revs at the end, except the very first revision of a chain
481 while (
481 while (
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 ):
483 ):
484 endidx -= 1
484 endidx -= 1
485
485
486 return revs[startidx:endidx]
486 return revs[startidx:endidx]
487
487
488
488
489 def segmentspan(revlog, revs):
489 def segmentspan(revlog, revs):
490 """Get the byte span of a segment of revisions
490 """Get the byte span of a segment of revisions
491
491
492 revs is a sorted array of revision numbers
492 revs is a sorted array of revision numbers
493
493
494 >>> revlog = _testrevlog([
494 >>> revlog = _testrevlog([
495 ... 5, #0
495 ... 5, #0
496 ... 10, #1
496 ... 10, #1
497 ... 12, #2
497 ... 12, #2
498 ... 12, #3 (empty)
498 ... 12, #3 (empty)
499 ... 17, #4
499 ... 17, #4
500 ... ])
500 ... ])
501
501
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 17
503 17
504 >>> segmentspan(revlog, [0, 4])
504 >>> segmentspan(revlog, [0, 4])
505 17
505 17
506 >>> segmentspan(revlog, [3, 4])
506 >>> segmentspan(revlog, [3, 4])
507 5
507 5
508 >>> segmentspan(revlog, [1, 2, 3,])
508 >>> segmentspan(revlog, [1, 2, 3,])
509 7
509 7
510 >>> segmentspan(revlog, [1, 3])
510 >>> segmentspan(revlog, [1, 3])
511 7
511 7
512 """
512 """
513 if not revs:
513 if not revs:
514 return 0
514 return 0
515 end = revlog.end(revs[-1])
515 end = revlog.end(revs[-1])
516 return end - revlog.start(revs[0])
516 return end - revlog.start(revs[0])
517
517
518
518
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 """build full text from a (base, delta) pair and other metadata"""
520 """build full text from a (base, delta) pair and other metadata"""
521 # special case deltas which replace entire base; no need to decode
521 # special case deltas which replace entire base; no need to decode
522 # base revision. this neatly avoids censored bases, which throw when
522 # base revision. this neatly avoids censored bases, which throw when
523 # they're decoded.
523 # they're decoded.
524 hlen = struct.calcsize(b">lll")
524 hlen = struct.calcsize(b">lll")
525 if delta[:hlen] == mdiff.replacediffheader(
525 if delta[:hlen] == mdiff.replacediffheader(
526 revlog.rawsize(baserev), len(delta) - hlen
526 revlog.rawsize(baserev), len(delta) - hlen
527 ):
527 ):
528 fulltext = delta[hlen:]
528 fulltext = delta[hlen:]
529 else:
529 else:
530 # deltabase is rawtext before changed by flag processors, which is
530 # deltabase is rawtext before changed by flag processors, which is
531 # equivalent to non-raw text
531 # equivalent to non-raw text
532 basetext = revlog.revision(baserev, _df=fh)
532 basetext = revlog.revision(baserev, _df=fh)
533 fulltext = mdiff.patch(basetext, delta)
533 fulltext = mdiff.patch(basetext, delta)
534
534
535 try:
535 try:
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 if validatehash:
537 if validatehash:
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 if flags & REVIDX_ISCENSORED:
539 if flags & REVIDX_ISCENSORED:
540 raise error.StorageError(
540 raise error.StorageError(
541 _(b'node %s is not censored') % expectednode
541 _(b'node %s is not censored') % expectednode
542 )
542 )
543 except error.CensoredNodeError:
543 except error.CensoredNodeError:
544 # must pass the censored index flag to add censored revisions
544 # must pass the censored index flag to add censored revisions
545 if not flags & REVIDX_ISCENSORED:
545 if not flags & REVIDX_ISCENSORED:
546 raise
546 raise
547 return fulltext
547 return fulltext
548
548
549
549
550 @attr.s(slots=True, frozen=True)
550 @attr.s(slots=True, frozen=True)
551 class _deltainfo:
551 class _deltainfo:
552 distance = attr.ib()
552 distance = attr.ib()
553 deltalen = attr.ib()
553 deltalen = attr.ib()
554 data = attr.ib()
554 data = attr.ib()
555 base = attr.ib()
555 base = attr.ib()
556 chainbase = attr.ib()
556 chainbase = attr.ib()
557 chainlen = attr.ib()
557 chainlen = attr.ib()
558 compresseddeltalen = attr.ib()
558 compresseddeltalen = attr.ib()
559 snapshotdepth = attr.ib()
559 snapshotdepth = attr.ib()
560
560
561
561
562 def drop_u_compression(delta):
562 def drop_u_compression(delta):
563 """turn into a "u" (no-compression) into no-compression without header
563 """turn into a "u" (no-compression) into no-compression without header
564
564
565 This is useful for revlog format that has better compression method.
565 This is useful for revlog format that has better compression method.
566 """
566 """
567 assert delta.data[0] == b'u', delta.data[0]
567 assert delta.data[0] == b'u', delta.data[0]
568 return _deltainfo(
568 return _deltainfo(
569 delta.distance,
569 delta.distance,
570 delta.deltalen - 1,
570 delta.deltalen - 1,
571 (b'', delta.data[1]),
571 (b'', delta.data[1]),
572 delta.base,
572 delta.base,
573 delta.chainbase,
573 delta.chainbase,
574 delta.chainlen,
574 delta.chainlen,
575 delta.compresseddeltalen,
575 delta.compresseddeltalen,
576 delta.snapshotdepth,
576 delta.snapshotdepth,
577 )
577 )
578
578
579
579
580 def is_good_delta_info(revlog, deltainfo, revinfo):
580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 """Returns True if the given delta is good. Good means that it is within
581 """Returns True if the given delta is good. Good means that it is within
582 the disk span, disk size, and chain length bounds that we know to be
582 the disk span, disk size, and chain length bounds that we know to be
583 performant."""
583 performant."""
584 if deltainfo is None:
584 if deltainfo is None:
585 return False
585 return False
586
586
587 # - 'deltainfo.distance' is the distance from the base revision --
587 # - 'deltainfo.distance' is the distance from the base revision --
588 # bounding it limits the amount of I/O we need to do.
588 # bounding it limits the amount of I/O we need to do.
589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
590 # deltas we need to apply -- bounding it limits the amount of CPU
590 # deltas we need to apply -- bounding it limits the amount of CPU
591 # we consume.
591 # we consume.
592
592
593 textlen = revinfo.textlen
593 textlen = revinfo.textlen
594 defaultmax = textlen * 4
594 defaultmax = textlen * 4
595 maxdist = revlog._maxdeltachainspan
595 maxdist = revlog._maxdeltachainspan
596 if not maxdist:
596 if not maxdist:
597 maxdist = deltainfo.distance # ensure the conditional pass
597 maxdist = deltainfo.distance # ensure the conditional pass
598 maxdist = max(maxdist, defaultmax)
598 maxdist = max(maxdist, defaultmax)
599
599
600 # Bad delta from read span:
600 # Bad delta from read span:
601 #
601 #
602 # If the span of data read is larger than the maximum allowed.
602 # If the span of data read is larger than the maximum allowed.
603 #
603 #
604 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 # In the sparse-revlog case, we rely on the associated "sparse reading"
605 # to avoid issue related to the span of data. In theory, it would be
605 # to avoid issue related to the span of data. In theory, it would be
606 # possible to build pathological revlog where delta pattern would lead
606 # possible to build pathological revlog where delta pattern would lead
607 # to too many reads. However, they do not happen in practice at all. So
607 # to too many reads. However, they do not happen in practice at all. So
608 # we skip the span check entirely.
608 # we skip the span check entirely.
609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
610 return False
610 return False
611
611
612 # Bad delta from new delta size:
612 # Bad delta from new delta size:
613 #
613 #
614 # If the delta size is larger than the target text, storing the
614 # If the delta size is larger than the target text, storing the
615 # delta will be inefficient.
615 # delta will be inefficient.
616 if textlen < deltainfo.deltalen:
616 if textlen < deltainfo.deltalen:
617 return False
617 return False
618
618
619 # Bad delta from cumulated payload size:
619 # Bad delta from cumulated payload size:
620 #
620 #
621 # If the sum of delta get larger than K * target text length.
621 # If the sum of delta get larger than K * target text length.
622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
623 return False
623 return False
624
624
625 # Bad delta from chain length:
625 # Bad delta from chain length:
626 #
626 #
627 # If the number of delta in the chain gets too high.
627 # If the number of delta in the chain gets too high.
628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
629 return False
629 return False
630
630
631 # bad delta from intermediate snapshot size limit
631 # bad delta from intermediate snapshot size limit
632 #
632 #
633 # If an intermediate snapshot size is higher than the limit. The
633 # If an intermediate snapshot size is higher than the limit. The
634 # limit exist to prevent endless chain of intermediate delta to be
634 # limit exist to prevent endless chain of intermediate delta to be
635 # created.
635 # created.
636 if (
636 if (
637 deltainfo.snapshotdepth is not None
637 deltainfo.snapshotdepth is not None
638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
639 ):
639 ):
640 return False
640 return False
641
641
642 # bad delta if new intermediate snapshot is larger than the previous
642 # bad delta if new intermediate snapshot is larger than the previous
643 # snapshot
643 # snapshot
644 if (
644 if (
645 deltainfo.snapshotdepth
645 deltainfo.snapshotdepth
646 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 and revlog.length(deltainfo.base) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 return True
650 return True
651
651
652
652
653 # If a revision's full text is that much bigger than a base candidate full
653 # If a revision's full text is that much bigger than a base candidate full
654 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 # text's, it is very unlikely that it will produce a valid delta. We no longer
655 # consider these candidates.
655 # consider these candidates.
656 LIMIT_BASE2TEXT = 500
656 LIMIT_BASE2TEXT = 500
657
657
658
658
659 def _candidategroups(
659 def _candidategroups(
660 revlog,
660 revlog,
661 textlen,
661 textlen,
662 p1,
662 p1,
663 p2,
663 p2,
664 cachedelta,
664 cachedelta,
665 excluded_bases=None,
665 excluded_bases=None,
666 target_rev=None,
666 target_rev=None,
667 ):
667 ):
668 """Provides group of revision to be tested as delta base
668 """Provides group of revision to be tested as delta base
669
669
670 This top level function focus on emitting groups with unique and worthwhile
670 This top level function focus on emitting groups with unique and worthwhile
671 content. See _raw_candidate_groups for details about the group order.
671 content. See _raw_candidate_groups for details about the group order.
672 """
672 """
673 # should we try to build a delta?
673 # should we try to build a delta?
674 if not (len(revlog) and revlog._storedeltachains):
674 if not (len(revlog) and revlog._storedeltachains):
675 yield None
675 yield None
676 return
676 return
677
677
678 deltalength = revlog.length
678 deltalength = revlog.length
679 deltaparent = revlog.deltaparent
679 deltaparent = revlog.deltaparent
680 sparse = revlog._sparserevlog
680 sparse = revlog._sparserevlog
681 good = None
681 good = None
682
682
683 deltas_limit = textlen * LIMIT_DELTA2TEXT
683 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 group_chunk_size = revlog._candidate_group_chunk_size
684 group_chunk_size = revlog._candidate_group_chunk_size
685
685
686 tested = {nullrev}
686 tested = {nullrev}
687 candidates = _refinedgroups(
687 candidates = _refinedgroups(
688 revlog,
688 revlog,
689 p1,
689 p1,
690 p2,
690 p2,
691 cachedelta,
691 cachedelta,
692 )
692 )
693 while True:
693 while True:
694 temptative = candidates.send(good)
694 temptative = candidates.send(good)
695 if temptative is None:
695 if temptative is None:
696 break
696 break
697 group = []
697 group = []
698 for rev in temptative:
698 for rev in temptative:
699 # skip over empty delta (no need to include them in a chain)
699 # skip over empty delta (no need to include them in a chain)
700 while revlog._generaldelta and not (
700 while revlog._generaldelta and not (
701 rev == nullrev or rev in tested or deltalength(rev)
701 rev == nullrev or rev in tested or deltalength(rev)
702 ):
702 ):
703 tested.add(rev)
703 tested.add(rev)
704 rev = deltaparent(rev)
704 rev = deltaparent(rev)
705 # no need to try a delta against nullrev, this will be done as a
705 # no need to try a delta against nullrev, this will be done as a
706 # last resort.
706 # last resort.
707 if rev == nullrev:
707 if rev == nullrev:
708 continue
708 continue
709 # filter out revision we tested already
709 # filter out revision we tested already
710 if rev in tested:
710 if rev in tested:
711 continue
711 continue
712 # an higher authority deamed the base unworthy (e.g. censored)
712 # an higher authority deamed the base unworthy (e.g. censored)
713 if excluded_bases is not None and rev in excluded_bases:
713 if excluded_bases is not None and rev in excluded_bases:
714 tested.add(rev)
714 tested.add(rev)
715 continue
715 continue
716 # We are in some recomputation cases and that rev is too high in
716 # We are in some recomputation cases and that rev is too high in
717 # the revlog
717 # the revlog
718 if target_rev is not None and rev >= target_rev:
718 if target_rev is not None and rev >= target_rev:
719 tested.add(rev)
719 tested.add(rev)
720 continue
720 continue
721 # filter out delta base that will never produce good delta
721 # filter out delta base that will never produce good delta
722 if deltas_limit < revlog.length(rev):
722 if deltas_limit < revlog.length(rev):
723 tested.add(rev)
723 tested.add(rev)
724 continue
724 continue
725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
726 tested.add(rev)
726 tested.add(rev)
727 continue
727 continue
728 # no delta for rawtext-changing revs (see "candelta" for why)
728 # no delta for rawtext-changing revs (see "candelta" for why)
729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
730 tested.add(rev)
730 tested.add(rev)
731 continue
731 continue
732
732
733 # If we reach here, we are about to build and test a delta.
733 # If we reach here, we are about to build and test a delta.
734 # The delta building process will compute the chaininfo in all
734 # The delta building process will compute the chaininfo in all
735 # case, since that computation is cached, it is fine to access it
735 # case, since that computation is cached, it is fine to access it
736 # here too.
736 # here too.
737 chainlen, chainsize = revlog._chaininfo(rev)
737 chainlen, chainsize = revlog._chaininfo(rev)
738 # if chain will be too long, skip base
738 # if chain will be too long, skip base
739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
740 tested.add(rev)
740 tested.add(rev)
741 continue
741 continue
742 # if chain already have too much data, skip base
742 # if chain already have too much data, skip base
743 if deltas_limit < chainsize:
743 if deltas_limit < chainsize:
744 tested.add(rev)
744 tested.add(rev)
745 continue
745 continue
746 if sparse and revlog.upperboundcomp is not None:
746 if sparse and revlog.upperboundcomp is not None:
747 maxcomp = revlog.upperboundcomp
747 maxcomp = revlog.upperboundcomp
748 basenotsnap = (p1, p2, nullrev)
748 basenotsnap = (p1, p2, nullrev)
749 if rev not in basenotsnap and revlog.issnapshot(rev):
749 if rev not in basenotsnap and revlog.issnapshot(rev):
750 snapshotdepth = revlog.snapshotdepth(rev)
750 snapshotdepth = revlog.snapshotdepth(rev)
751 # If text is significantly larger than the base, we can
751 # If text is significantly larger than the base, we can
752 # expect the resulting delta to be proportional to the size
752 # expect the resulting delta to be proportional to the size
753 # difference
753 # difference
754 revsize = revlog.rawsize(rev)
754 revsize = revlog.rawsize(rev)
755 rawsizedistance = max(textlen - revsize, 0)
755 rawsizedistance = max(textlen - revsize, 0)
756 # use an estimate of the compression upper bound.
756 # use an estimate of the compression upper bound.
757 lowestrealisticdeltalen = rawsizedistance // maxcomp
757 lowestrealisticdeltalen = rawsizedistance // maxcomp
758
758
759 # check the absolute constraint on the delta size
759 # check the absolute constraint on the delta size
760 snapshotlimit = textlen >> snapshotdepth
760 snapshotlimit = textlen >> snapshotdepth
761 if snapshotlimit < lowestrealisticdeltalen:
761 if snapshotlimit < lowestrealisticdeltalen:
762 # delta lower bound is larger than accepted upper bound
762 # delta lower bound is larger than accepted upper bound
763 tested.add(rev)
763 tested.add(rev)
764 continue
764 continue
765
765
766 # check the relative constraint on the delta size
766 # check the relative constraint on the delta size
767 revlength = revlog.length(rev)
767 revlength = revlog.length(rev)
768 if revlength < lowestrealisticdeltalen:
768 if revlength < lowestrealisticdeltalen:
769 # delta probable lower bound is larger than target base
769 # delta probable lower bound is larger than target base
770 tested.add(rev)
770 tested.add(rev)
771 continue
771 continue
772
772
773 group.append(rev)
773 group.append(rev)
774 if group:
774 if group:
775 # When the size of the candidate group is big, it can result in a
775 # When the size of the candidate group is big, it can result in a
776 # quite significant performance impact. To reduce this, we can send
776 # quite significant performance impact. To reduce this, we can send
777 # them in smaller batches until the new batch does not provide any
777 # them in smaller batches until the new batch does not provide any
778 # improvements.
778 # improvements.
779 #
779 #
780 # This might reduce the overall efficiency of the compression in
780 # This might reduce the overall efficiency of the compression in
781 # some corner cases, but that should also prevent very pathological
781 # some corner cases, but that should also prevent very pathological
782 # cases from being an issue. (eg. 20 000 candidates).
782 # cases from being an issue. (eg. 20 000 candidates).
783 #
783 #
784 # XXX note that the ordering of the group becomes important as it
784 # XXX note that the ordering of the group becomes important as it
785 # now impacts the final result. The current order is unprocessed
785 # now impacts the final result. The current order is unprocessed
786 # and can be improved.
786 # and can be improved.
787 if group_chunk_size == 0:
787 if group_chunk_size == 0:
788 tested.update(group)
788 tested.update(group)
789 good = yield tuple(group)
789 good = yield tuple(group)
790 else:
790 else:
791 prev_good = good
791 prev_good = good
792 for start in range(0, len(group), group_chunk_size):
792 for start in range(0, len(group), group_chunk_size):
793 sub_group = group[start : start + group_chunk_size]
793 sub_group = group[start : start + group_chunk_size]
794 tested.update(sub_group)
794 tested.update(sub_group)
795 good = yield tuple(sub_group)
795 good = yield tuple(sub_group)
796 if prev_good == good:
796 if prev_good == good:
797 break
797 break
798
798
799 yield None
799 yield None
800
800
801
801
802 def _refinedgroups(revlog, p1, p2, cachedelta):
802 def _refinedgroups(revlog, p1, p2, cachedelta):
803 good = None
803 good = None
804 # First we try to reuse a the delta contained in the bundle.
804 # First we try to reuse a the delta contained in the bundle.
805 # (or from the source revlog)
805 # (or from the source revlog)
806 #
806 #
807 # This logic only applies to general delta repositories and can be disabled
807 # This logic only applies to general delta repositories and can be disabled
808 # through configuration. Disabling reuse source delta is useful when
808 # through configuration. Disabling reuse source delta is useful when
809 # we want to make sure we recomputed "optimal" deltas.
809 # we want to make sure we recomputed "optimal" deltas.
810 debug_info = None
810 debug_info = None
811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
812 # Assume what we received from the server is a good choice
812 # Assume what we received from the server is a good choice
813 # build delta will reuse the cache
813 # build delta will reuse the cache
814 if debug_info is not None:
814 if debug_info is not None:
815 debug_info['cached-delta.tested'] += 1
815 debug_info['cached-delta.tested'] += 1
816 good = yield (cachedelta[0],)
816 good = yield (cachedelta[0],)
817 if good is not None:
817 if good is not None:
818 if debug_info is not None:
818 if debug_info is not None:
819 debug_info['cached-delta.accepted'] += 1
819 debug_info['cached-delta.accepted'] += 1
820 yield None
820 yield None
821 return
821 return
822 # XXX cache me higher
822 # XXX cache me higher
823 snapshot_cache = SnapshotCache()
823 snapshot_cache = SnapshotCache()
824 groups = _rawgroups(
824 groups = _rawgroups(
825 revlog,
825 revlog,
826 p1,
826 p1,
827 p2,
827 p2,
828 cachedelta,
828 cachedelta,
829 snapshot_cache,
829 snapshot_cache,
830 )
830 )
831 for candidates in groups:
831 for candidates in groups:
832 good = yield candidates
832 good = yield candidates
833 if good is not None:
833 if good is not None:
834 break
834 break
835
835
836 # If sparse revlog is enabled, we can try to refine the available deltas
836 # If sparse revlog is enabled, we can try to refine the available deltas
837 if not revlog._sparserevlog:
837 if not revlog._sparserevlog:
838 yield None
838 yield None
839 return
839 return
840
840
841 # if we have a refinable value, try to refine it
841 # if we have a refinable value, try to refine it
842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
843 # refine snapshot down
843 # refine snapshot down
844 previous = None
844 previous = None
845 while previous != good:
845 while previous != good:
846 previous = good
846 previous = good
847 base = revlog.deltaparent(good)
847 base = revlog.deltaparent(good)
848 if base == nullrev:
848 if base == nullrev:
849 break
849 break
850 good = yield (base,)
850 good = yield (base,)
851 # refine snapshot up
851 # refine snapshot up
852 if not snapshot_cache.snapshots:
852 if not snapshot_cache.snapshots:
853 snapshot_cache.update(revlog, good + 1)
853 snapshot_cache.update(revlog, good + 1)
854 previous = None
854 previous = None
855 while good != previous:
855 while good != previous:
856 previous = good
856 previous = good
857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
858 good = yield children
858 good = yield children
859
859
860 if debug_info is not None:
860 if debug_info is not None:
861 if good is None:
861 if good is None:
862 debug_info['no-solution'] += 1
862 debug_info['no-solution'] += 1
863
863
864 yield None
864 yield None
865
865
866
866
867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
868 """Provides group of revision to be tested as delta base
868 """Provides group of revision to be tested as delta base
869
869
870 This lower level function focus on emitting delta theorically interresting
870 This lower level function focus on emitting delta theorically interresting
871 without looking it any practical details.
871 without looking it any practical details.
872
872
873 The group order aims at providing fast or small candidates first.
873 The group order aims at providing fast or small candidates first.
874 """
874 """
875 gdelta = revlog._generaldelta
875 gdelta = revlog._generaldelta
876 # gate sparse behind general-delta because of issue6056
876 # gate sparse behind general-delta because of issue6056
877 sparse = gdelta and revlog._sparserevlog
877 sparse = gdelta and revlog._sparserevlog
878 curr = len(revlog)
878 curr = len(revlog)
879 prev = curr - 1
879 prev = curr - 1
880 deltachain = lambda rev: revlog._deltachain(rev)[0]
880 deltachain = lambda rev: revlog._deltachain(rev)[0]
881
881
882 if gdelta:
882 if gdelta:
883 # exclude already lazy tested base if any
883 # exclude already lazy tested base if any
884 parents = [p for p in (p1, p2) if p != nullrev]
884 parents = [p for p in (p1, p2) if p != nullrev]
885
885
886 if not revlog._deltabothparents and len(parents) == 2:
886 if not revlog._deltabothparents and len(parents) == 2:
887 parents.sort()
887 parents.sort()
888 # To minimize the chance of having to build a fulltext,
888 # To minimize the chance of having to build a fulltext,
889 # pick first whichever parent is closest to us (max rev)
889 # pick first whichever parent is closest to us (max rev)
890 yield (parents[1],)
890 yield (parents[1],)
891 # then the other one (min rev) if the first did not fit
891 # then the other one (min rev) if the first did not fit
892 yield (parents[0],)
892 yield (parents[0],)
893 elif len(parents) > 0:
893 elif len(parents) > 0:
894 # Test all parents (1 or 2), and keep the best candidate
894 # Test all parents (1 or 2), and keep the best candidate
895 yield parents
895 yield parents
896
896
897 if sparse and parents:
897 if sparse and parents:
898 if snapshot_cache is None:
898 if snapshot_cache is None:
899 # map: base-rev: [snapshot-revs]
899 # map: base-rev: [snapshot-revs]
900 snapshot_cache = SnapshotCache()
900 snapshot_cache = SnapshotCache()
901 # See if we can use an existing snapshot in the parent chains to use as
901 # See if we can use an existing snapshot in the parent chains to use as
902 # a base for a new intermediate-snapshot
902 # a base for a new intermediate-snapshot
903 #
903 #
904 # search for snapshot in parents delta chain
904 # search for snapshot in parents delta chain
905 # map: snapshot-level: snapshot-rev
905 # map: snapshot-level: snapshot-rev
906 parents_snaps = collections.defaultdict(set)
906 parents_snaps = collections.defaultdict(set)
907 candidate_chains = [deltachain(p) for p in parents]
907 candidate_chains = [deltachain(p) for p in parents]
908 for chain in candidate_chains:
908 for chain in candidate_chains:
909 for idx, s in enumerate(chain):
909 for idx, s in enumerate(chain):
910 if not revlog.issnapshot(s):
910 if not revlog.issnapshot(s):
911 break
911 break
912 parents_snaps[idx].add(s)
912 parents_snaps[idx].add(s)
913 snapfloor = min(parents_snaps[0]) + 1
913 snapfloor = min(parents_snaps[0]) + 1
914 snapshot_cache.update(revlog, snapfloor)
914 snapshot_cache.update(revlog, snapfloor)
915 # search for the highest "unrelated" revision
915 # search for the highest "unrelated" revision
916 #
916 #
917 # Adding snapshots used by "unrelated" revision increase the odd we
917 # Adding snapshots used by "unrelated" revision increase the odd we
918 # reuse an independant, yet better snapshot chain.
918 # reuse an independant, yet better snapshot chain.
919 #
919 #
920 # XXX instead of building a set of revisions, we could lazily enumerate
920 # XXX instead of building a set of revisions, we could lazily enumerate
921 # over the chains. That would be more efficient, however we stick to
921 # over the chains. That would be more efficient, however we stick to
922 # simple code for now.
922 # simple code for now.
923 all_revs = set()
923 all_revs = set()
924 for chain in candidate_chains:
924 for chain in candidate_chains:
925 all_revs.update(chain)
925 all_revs.update(chain)
926 other = None
926 other = None
927 for r in revlog.revs(prev, snapfloor):
927 for r in revlog.revs(prev, snapfloor):
928 if r not in all_revs:
928 if r not in all_revs:
929 other = r
929 other = r
930 break
930 break
931 if other is not None:
931 if other is not None:
932 # To avoid unfair competition, we won't use unrelated intermediate
932 # To avoid unfair competition, we won't use unrelated intermediate
933 # snapshot that are deeper than the ones from the parent delta
933 # snapshot that are deeper than the ones from the parent delta
934 # chain.
934 # chain.
935 max_depth = max(parents_snaps.keys())
935 max_depth = max(parents_snaps.keys())
936 chain = deltachain(other)
936 chain = deltachain(other)
937 for depth, s in enumerate(chain):
937 for depth, s in enumerate(chain):
938 if s < snapfloor:
938 if s < snapfloor:
939 continue
939 continue
940 if max_depth < depth:
940 if max_depth < depth:
941 break
941 break
942 if not revlog.issnapshot(s):
942 if not revlog.issnapshot(s):
943 break
943 break
944 parents_snaps[depth].add(s)
944 parents_snaps[depth].add(s)
945 # Test them as possible intermediate snapshot base
945 # Test them as possible intermediate snapshot base
946 # We test them from highest to lowest level. High level one are more
946 # We test them from highest to lowest level. High level one are more
947 # likely to result in small delta
947 # likely to result in small delta
948 floor = None
948 floor = None
949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
950 siblings = set()
950 siblings = set()
951 for s in snaps:
951 for s in snaps:
952 siblings.update(snapshot_cache.snapshots[s])
952 siblings.update(snapshot_cache.snapshots[s])
953 # Before considering making a new intermediate snapshot, we check
953 # Before considering making a new intermediate snapshot, we check
954 # if an existing snapshot, children of base we consider, would be
954 # if an existing snapshot, children of base we consider, would be
955 # suitable.
955 # suitable.
956 #
956 #
957 # It give a change to reuse a delta chain "unrelated" to the
957 # It give a change to reuse a delta chain "unrelated" to the
958 # current revision instead of starting our own. Without such
958 # current revision instead of starting our own. Without such
959 # re-use, topological branches would keep reopening new chains.
959 # re-use, topological branches would keep reopening new chains.
960 # Creating more and more snapshot as the repository grow.
960 # Creating more and more snapshot as the repository grow.
961
961
962 if floor is not None:
962 if floor is not None:
963 # We only do this for siblings created after the one in our
963 # We only do this for siblings created after the one in our
964 # parent's delta chain. Those created before has less chances
964 # parent's delta chain. Those created before has less chances
965 # to be valid base since our ancestors had to create a new
965 # to be valid base since our ancestors had to create a new
966 # snapshot.
966 # snapshot.
967 siblings = [r for r in siblings if floor < r]
967 siblings = [r for r in siblings if floor < r]
968 yield tuple(sorted(siblings))
968 yield tuple(sorted(siblings))
969 # then test the base from our parent's delta chain.
969 # then test the base from our parent's delta chain.
970 yield tuple(sorted(snaps))
970 yield tuple(sorted(snaps))
971 floor = min(snaps)
971 floor = min(snaps)
972 # No suitable base found in the parent chain, search if any full
972 # No suitable base found in the parent chain, search if any full
973 # snapshots emitted since parent's base would be a suitable base for an
973 # snapshots emitted since parent's base would be a suitable base for an
974 # intermediate snapshot.
974 # intermediate snapshot.
975 #
975 #
976 # It give a chance to reuse a delta chain unrelated to the current
976 # It give a chance to reuse a delta chain unrelated to the current
977 # revisions instead of starting our own. Without such re-use,
977 # revisions instead of starting our own. Without such re-use,
978 # topological branches would keep reopening new full chains. Creating
978 # topological branches would keep reopening new full chains. Creating
979 # more and more snapshot as the repository grow.
979 # more and more snapshot as the repository grow.
980 yield tuple(snapshot_cache.snapshots[nullrev])
980 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
981 yield tuple(sorted(full))
981
982
982 if not sparse:
983 if not sparse:
983 # other approach failed try against prev to hopefully save us a
984 # other approach failed try against prev to hopefully save us a
984 # fulltext.
985 # fulltext.
985 yield (prev,)
986 yield (prev,)
986
987
987
988
988 class SnapshotCache:
989 class SnapshotCache:
989 __slots__ = ('snapshots', '_start_rev', '_end_rev')
990 __slots__ = ('snapshots', '_start_rev', '_end_rev')
990
991
991 def __init__(self):
992 def __init__(self):
992 self.snapshots = collections.defaultdict(set)
993 self.snapshots = collections.defaultdict(set)
993 self._start_rev = None
994 self._start_rev = None
994 self._end_rev = None
995 self._end_rev = None
995
996
996 def update(self, revlog, start_rev=0):
997 def update(self, revlog, start_rev=0):
997 """find snapshots from start_rev to tip"""
998 """find snapshots from start_rev to tip"""
998 nb_revs = len(revlog)
999 nb_revs = len(revlog)
999 end_rev = nb_revs - 1
1000 end_rev = nb_revs - 1
1000 if start_rev > end_rev:
1001 if start_rev > end_rev:
1001 return # range is empty
1002 return # range is empty
1002
1003
1003 if self._start_rev is None:
1004 if self._start_rev is None:
1004 assert self._end_rev is None
1005 assert self._end_rev is None
1005 self._update(revlog, start_rev, end_rev)
1006 self._update(revlog, start_rev, end_rev)
1006 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1007 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1007 if start_rev < self._start_rev:
1008 if start_rev < self._start_rev:
1008 self._update(revlog, start_rev, self._start_rev - 1)
1009 self._update(revlog, start_rev, self._start_rev - 1)
1009 if self._end_rev < end_rev:
1010 if self._end_rev < end_rev:
1010 self._update(revlog, self._end_rev + 1, end_rev)
1011 self._update(revlog, self._end_rev + 1, end_rev)
1011
1012
1012 if self._start_rev is None:
1013 if self._start_rev is None:
1013 assert self._end_rev is None
1014 assert self._end_rev is None
1014 self._end_rev = end_rev
1015 self._end_rev = end_rev
1015 self._start_rev = start_rev
1016 self._start_rev = start_rev
1016 else:
1017 else:
1017 self._start_rev = min(self._start_rev, start_rev)
1018 self._start_rev = min(self._start_rev, start_rev)
1018 self._end_rev = max(self._end_rev, end_rev)
1019 self._end_rev = max(self._end_rev, end_rev)
1019 assert self._start_rev <= self._end_rev, (
1020 assert self._start_rev <= self._end_rev, (
1020 self._start_rev,
1021 self._start_rev,
1021 self._end_rev,
1022 self._end_rev,
1022 )
1023 )
1023
1024
1024 def _update(self, revlog, start_rev, end_rev):
1025 def _update(self, revlog, start_rev, end_rev):
1025 """internal method that actually do update content"""
1026 """internal method that actually do update content"""
1026 assert self._start_rev is None or (
1027 assert self._start_rev is None or (
1027 start_rev < self._start_rev or start_rev > self._end_rev
1028 start_rev < self._start_rev or start_rev > self._end_rev
1028 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1029 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1029 assert self._start_rev is None or (
1030 assert self._start_rev is None or (
1030 end_rev < self._start_rev or end_rev > self._end_rev
1031 end_rev < self._start_rev or end_rev > self._end_rev
1031 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1032 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1032 cache = self.snapshots
1033 cache = self.snapshots
1033 if util.safehasattr(revlog.index, b'findsnapshots'):
1034 if util.safehasattr(revlog.index, b'findsnapshots'):
1034 revlog.index.findsnapshots(cache, start_rev, end_rev)
1035 revlog.index.findsnapshots(cache, start_rev, end_rev)
1035 else:
1036 else:
1036 deltaparent = revlog.deltaparent
1037 deltaparent = revlog.deltaparent
1037 issnapshot = revlog.issnapshot
1038 issnapshot = revlog.issnapshot
1038 for rev in revlog.revs(start_rev, end_rev):
1039 for rev in revlog.revs(start_rev, end_rev):
1039 if issnapshot(rev):
1040 if issnapshot(rev):
1040 cache[deltaparent(rev)].add(rev)
1041 cache[deltaparent(rev)].add(rev)
1041
1042
1042
1043
1043 class deltacomputer:
1044 class deltacomputer:
1044 def __init__(
1045 def __init__(
1045 self,
1046 self,
1046 revlog,
1047 revlog,
1047 write_debug=None,
1048 write_debug=None,
1048 debug_search=False,
1049 debug_search=False,
1049 debug_info=None,
1050 debug_info=None,
1050 ):
1051 ):
1051 self.revlog = revlog
1052 self.revlog = revlog
1052 self._write_debug = write_debug
1053 self._write_debug = write_debug
1053 self._debug_search = debug_search
1054 self._debug_search = debug_search
1054 self._debug_info = debug_info
1055 self._debug_info = debug_info
1055
1056
1056 def buildtext(self, revinfo, fh):
1057 def buildtext(self, revinfo, fh):
1057 """Builds a fulltext version of a revision
1058 """Builds a fulltext version of a revision
1058
1059
1059 revinfo: revisioninfo instance that contains all needed info
1060 revinfo: revisioninfo instance that contains all needed info
1060 fh: file handle to either the .i or the .d revlog file,
1061 fh: file handle to either the .i or the .d revlog file,
1061 depending on whether it is inlined or not
1062 depending on whether it is inlined or not
1062 """
1063 """
1063 btext = revinfo.btext
1064 btext = revinfo.btext
1064 if btext[0] is not None:
1065 if btext[0] is not None:
1065 return btext[0]
1066 return btext[0]
1066
1067
1067 revlog = self.revlog
1068 revlog = self.revlog
1068 cachedelta = revinfo.cachedelta
1069 cachedelta = revinfo.cachedelta
1069 baserev = cachedelta[0]
1070 baserev = cachedelta[0]
1070 delta = cachedelta[1]
1071 delta = cachedelta[1]
1071
1072
1072 fulltext = btext[0] = _textfromdelta(
1073 fulltext = btext[0] = _textfromdelta(
1073 fh,
1074 fh,
1074 revlog,
1075 revlog,
1075 baserev,
1076 baserev,
1076 delta,
1077 delta,
1077 revinfo.p1,
1078 revinfo.p1,
1078 revinfo.p2,
1079 revinfo.p2,
1079 revinfo.flags,
1080 revinfo.flags,
1080 revinfo.node,
1081 revinfo.node,
1081 )
1082 )
1082 return fulltext
1083 return fulltext
1083
1084
1084 def _builddeltadiff(self, base, revinfo, fh):
1085 def _builddeltadiff(self, base, revinfo, fh):
1085 revlog = self.revlog
1086 revlog = self.revlog
1086 t = self.buildtext(revinfo, fh)
1087 t = self.buildtext(revinfo, fh)
1087 if revlog.iscensored(base):
1088 if revlog.iscensored(base):
1088 # deltas based on a censored revision must replace the
1089 # deltas based on a censored revision must replace the
1089 # full content in one patch, so delta works everywhere
1090 # full content in one patch, so delta works everywhere
1090 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1091 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1091 delta = header + t
1092 delta = header + t
1092 else:
1093 else:
1093 ptext = revlog.rawdata(base, _df=fh)
1094 ptext = revlog.rawdata(base, _df=fh)
1094 delta = mdiff.textdiff(ptext, t)
1095 delta = mdiff.textdiff(ptext, t)
1095
1096
1096 return delta
1097 return delta
1097
1098
1098 def _builddeltainfo(self, revinfo, base, fh):
1099 def _builddeltainfo(self, revinfo, base, fh):
1099 # can we use the cached delta?
1100 # can we use the cached delta?
1100 revlog = self.revlog
1101 revlog = self.revlog
1101 debug_search = self._write_debug is not None and self._debug_search
1102 debug_search = self._write_debug is not None and self._debug_search
1102 chainbase = revlog.chainbase(base)
1103 chainbase = revlog.chainbase(base)
1103 if revlog._generaldelta:
1104 if revlog._generaldelta:
1104 deltabase = base
1105 deltabase = base
1105 else:
1106 else:
1106 deltabase = chainbase
1107 deltabase = chainbase
1107 snapshotdepth = None
1108 snapshotdepth = None
1108 if revlog._sparserevlog and deltabase == nullrev:
1109 if revlog._sparserevlog and deltabase == nullrev:
1109 snapshotdepth = 0
1110 snapshotdepth = 0
1110 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1111 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1111 # A delta chain should always be one full snapshot,
1112 # A delta chain should always be one full snapshot,
1112 # zero or more semi-snapshots, and zero or more deltas
1113 # zero or more semi-snapshots, and zero or more deltas
1113 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1114 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1114 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1115 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1115 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1116 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1116 delta = None
1117 delta = None
1117 if revinfo.cachedelta:
1118 if revinfo.cachedelta:
1118 cachebase = revinfo.cachedelta[0]
1119 cachebase = revinfo.cachedelta[0]
1119 # check if the diff still apply
1120 # check if the diff still apply
1120 currentbase = cachebase
1121 currentbase = cachebase
1121 while (
1122 while (
1122 currentbase != nullrev
1123 currentbase != nullrev
1123 and currentbase != base
1124 and currentbase != base
1124 and self.revlog.length(currentbase) == 0
1125 and self.revlog.length(currentbase) == 0
1125 ):
1126 ):
1126 currentbase = self.revlog.deltaparent(currentbase)
1127 currentbase = self.revlog.deltaparent(currentbase)
1127 if self.revlog._lazydelta and currentbase == base:
1128 if self.revlog._lazydelta and currentbase == base:
1128 delta = revinfo.cachedelta[1]
1129 delta = revinfo.cachedelta[1]
1129 if delta is None:
1130 if delta is None:
1130 delta = self._builddeltadiff(base, revinfo, fh)
1131 delta = self._builddeltadiff(base, revinfo, fh)
1131 if debug_search:
1132 if debug_search:
1132 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1133 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1133 msg %= len(delta)
1134 msg %= len(delta)
1134 self._write_debug(msg)
1135 self._write_debug(msg)
1135 # snapshotdept need to be neither None nor 0 level snapshot
1136 # snapshotdept need to be neither None nor 0 level snapshot
1136 if revlog.upperboundcomp is not None and snapshotdepth:
1137 if revlog.upperboundcomp is not None and snapshotdepth:
1137 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1138 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1138 snapshotlimit = revinfo.textlen >> snapshotdepth
1139 snapshotlimit = revinfo.textlen >> snapshotdepth
1139 if debug_search:
1140 if debug_search:
1140 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1141 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1141 msg %= lowestrealisticdeltalen
1142 msg %= lowestrealisticdeltalen
1142 self._write_debug(msg)
1143 self._write_debug(msg)
1143 if snapshotlimit < lowestrealisticdeltalen:
1144 if snapshotlimit < lowestrealisticdeltalen:
1144 if debug_search:
1145 if debug_search:
1145 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1146 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1146 self._write_debug(msg)
1147 self._write_debug(msg)
1147 return None
1148 return None
1148 if revlog.length(base) < lowestrealisticdeltalen:
1149 if revlog.length(base) < lowestrealisticdeltalen:
1149 if debug_search:
1150 if debug_search:
1150 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1151 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1151 self._write_debug(msg)
1152 self._write_debug(msg)
1152 return None
1153 return None
1153 header, data = revlog.compress(delta)
1154 header, data = revlog.compress(delta)
1154 deltalen = len(header) + len(data)
1155 deltalen = len(header) + len(data)
1155 offset = revlog.end(len(revlog) - 1)
1156 offset = revlog.end(len(revlog) - 1)
1156 dist = deltalen + offset - revlog.start(chainbase)
1157 dist = deltalen + offset - revlog.start(chainbase)
1157 chainlen, compresseddeltalen = revlog._chaininfo(base)
1158 chainlen, compresseddeltalen = revlog._chaininfo(base)
1158 chainlen += 1
1159 chainlen += 1
1159 compresseddeltalen += deltalen
1160 compresseddeltalen += deltalen
1160
1161
1161 return _deltainfo(
1162 return _deltainfo(
1162 dist,
1163 dist,
1163 deltalen,
1164 deltalen,
1164 (header, data),
1165 (header, data),
1165 deltabase,
1166 deltabase,
1166 chainbase,
1167 chainbase,
1167 chainlen,
1168 chainlen,
1168 compresseddeltalen,
1169 compresseddeltalen,
1169 snapshotdepth,
1170 snapshotdepth,
1170 )
1171 )
1171
1172
1172 def _fullsnapshotinfo(self, fh, revinfo, curr):
1173 def _fullsnapshotinfo(self, fh, revinfo, curr):
1173 rawtext = self.buildtext(revinfo, fh)
1174 rawtext = self.buildtext(revinfo, fh)
1174 data = self.revlog.compress(rawtext)
1175 data = self.revlog.compress(rawtext)
1175 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1176 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1176 deltabase = chainbase = curr
1177 deltabase = chainbase = curr
1177 snapshotdepth = 0
1178 snapshotdepth = 0
1178 chainlen = 1
1179 chainlen = 1
1179
1180
1180 return _deltainfo(
1181 return _deltainfo(
1181 dist,
1182 dist,
1182 deltalen,
1183 deltalen,
1183 data,
1184 data,
1184 deltabase,
1185 deltabase,
1185 chainbase,
1186 chainbase,
1186 chainlen,
1187 chainlen,
1187 compresseddeltalen,
1188 compresseddeltalen,
1188 snapshotdepth,
1189 snapshotdepth,
1189 )
1190 )
1190
1191
1191 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1192 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1192 """Find an acceptable delta against a candidate revision
1193 """Find an acceptable delta against a candidate revision
1193
1194
1194 revinfo: information about the revision (instance of _revisioninfo)
1195 revinfo: information about the revision (instance of _revisioninfo)
1195 fh: file handle to either the .i or the .d revlog file,
1196 fh: file handle to either the .i or the .d revlog file,
1196 depending on whether it is inlined or not
1197 depending on whether it is inlined or not
1197
1198
1198 Returns the first acceptable candidate revision, as ordered by
1199 Returns the first acceptable candidate revision, as ordered by
1199 _candidategroups
1200 _candidategroups
1200
1201
1201 If no suitable deltabase is found, we return delta info for a full
1202 If no suitable deltabase is found, we return delta info for a full
1202 snapshot.
1203 snapshot.
1203
1204
1204 `excluded_bases` is an optional set of revision that cannot be used as
1205 `excluded_bases` is an optional set of revision that cannot be used as
1205 a delta base. Use this to recompute delta suitable in censor or strip
1206 a delta base. Use this to recompute delta suitable in censor or strip
1206 context.
1207 context.
1207 """
1208 """
1208 if target_rev is None:
1209 if target_rev is None:
1209 target_rev = len(self.revlog)
1210 target_rev = len(self.revlog)
1210
1211
1211 if not revinfo.textlen:
1212 if not revinfo.textlen:
1212 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1213 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1213
1214
1214 if excluded_bases is None:
1215 if excluded_bases is None:
1215 excluded_bases = set()
1216 excluded_bases = set()
1216
1217
1217 # no delta for flag processor revision (see "candelta" for why)
1218 # no delta for flag processor revision (see "candelta" for why)
1218 # not calling candelta since only one revision needs test, also to
1219 # not calling candelta since only one revision needs test, also to
1219 # avoid overhead fetching flags again.
1220 # avoid overhead fetching flags again.
1220 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1221 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1221 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1222 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1222
1223
1223 gather_debug = (
1224 gather_debug = (
1224 self._write_debug is not None or self._debug_info is not None
1225 self._write_debug is not None or self._debug_info is not None
1225 )
1226 )
1226 debug_search = self._write_debug is not None and self._debug_search
1227 debug_search = self._write_debug is not None and self._debug_search
1227
1228
1228 if gather_debug:
1229 if gather_debug:
1229 start = util.timer()
1230 start = util.timer()
1230
1231
1231 # count the number of different delta we tried (for debug purpose)
1232 # count the number of different delta we tried (for debug purpose)
1232 dbg_try_count = 0
1233 dbg_try_count = 0
1233 # count the number of "search round" we did. (for debug purpose)
1234 # count the number of "search round" we did. (for debug purpose)
1234 dbg_try_rounds = 0
1235 dbg_try_rounds = 0
1235 dbg_type = b'unknown'
1236 dbg_type = b'unknown'
1236
1237
1237 cachedelta = revinfo.cachedelta
1238 cachedelta = revinfo.cachedelta
1238 p1 = revinfo.p1
1239 p1 = revinfo.p1
1239 p2 = revinfo.p2
1240 p2 = revinfo.p2
1240 revlog = self.revlog
1241 revlog = self.revlog
1241
1242
1242 deltainfo = None
1243 deltainfo = None
1243 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1244 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1244
1245
1245 if gather_debug:
1246 if gather_debug:
1246 if p1r != nullrev:
1247 if p1r != nullrev:
1247 p1_chain_len = revlog._chaininfo(p1r)[0]
1248 p1_chain_len = revlog._chaininfo(p1r)[0]
1248 else:
1249 else:
1249 p1_chain_len = -1
1250 p1_chain_len = -1
1250 if p2r != nullrev:
1251 if p2r != nullrev:
1251 p2_chain_len = revlog._chaininfo(p2r)[0]
1252 p2_chain_len = revlog._chaininfo(p2r)[0]
1252 else:
1253 else:
1253 p2_chain_len = -1
1254 p2_chain_len = -1
1254 if debug_search:
1255 if debug_search:
1255 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1256 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1256 msg %= target_rev
1257 msg %= target_rev
1257 self._write_debug(msg)
1258 self._write_debug(msg)
1258
1259
1259 groups = _candidategroups(
1260 groups = _candidategroups(
1260 self.revlog,
1261 self.revlog,
1261 revinfo.textlen,
1262 revinfo.textlen,
1262 p1r,
1263 p1r,
1263 p2r,
1264 p2r,
1264 cachedelta,
1265 cachedelta,
1265 excluded_bases,
1266 excluded_bases,
1266 target_rev,
1267 target_rev,
1267 )
1268 )
1268 candidaterevs = next(groups)
1269 candidaterevs = next(groups)
1269 while candidaterevs is not None:
1270 while candidaterevs is not None:
1270 dbg_try_rounds += 1
1271 dbg_try_rounds += 1
1271 if debug_search:
1272 if debug_search:
1272 prev = None
1273 prev = None
1273 if deltainfo is not None:
1274 if deltainfo is not None:
1274 prev = deltainfo.base
1275 prev = deltainfo.base
1275
1276
1276 if (
1277 if (
1277 cachedelta is not None
1278 cachedelta is not None
1278 and len(candidaterevs) == 1
1279 and len(candidaterevs) == 1
1279 and cachedelta[0] in candidaterevs
1280 and cachedelta[0] in candidaterevs
1280 ):
1281 ):
1281 round_type = b"cached-delta"
1282 round_type = b"cached-delta"
1282 elif p1 in candidaterevs or p2 in candidaterevs:
1283 elif p1 in candidaterevs or p2 in candidaterevs:
1283 round_type = b"parents"
1284 round_type = b"parents"
1284 elif prev is not None and all(c < prev for c in candidaterevs):
1285 elif prev is not None and all(c < prev for c in candidaterevs):
1285 round_type = b"refine-down"
1286 round_type = b"refine-down"
1286 elif prev is not None and all(c > prev for c in candidaterevs):
1287 elif prev is not None and all(c > prev for c in candidaterevs):
1287 round_type = b"refine-up"
1288 round_type = b"refine-up"
1288 else:
1289 else:
1289 round_type = b"search-down"
1290 round_type = b"search-down"
1290 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1291 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1291 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1292 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1292 self._write_debug(msg)
1293 self._write_debug(msg)
1293 nominateddeltas = []
1294 nominateddeltas = []
1294 if deltainfo is not None:
1295 if deltainfo is not None:
1295 if debug_search:
1296 if debug_search:
1296 msg = (
1297 msg = (
1297 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1298 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1298 )
1299 )
1299 msg %= (deltainfo.base, deltainfo.deltalen)
1300 msg %= (deltainfo.base, deltainfo.deltalen)
1300 self._write_debug(msg)
1301 self._write_debug(msg)
1301 # if we already found a good delta,
1302 # if we already found a good delta,
1302 # challenge it against refined candidates
1303 # challenge it against refined candidates
1303 nominateddeltas.append(deltainfo)
1304 nominateddeltas.append(deltainfo)
1304 for candidaterev in candidaterevs:
1305 for candidaterev in candidaterevs:
1305 if debug_search:
1306 if debug_search:
1306 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1307 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1307 msg %= candidaterev
1308 msg %= candidaterev
1308 self._write_debug(msg)
1309 self._write_debug(msg)
1309 candidate_type = None
1310 candidate_type = None
1310 if candidaterev == p1:
1311 if candidaterev == p1:
1311 candidate_type = b"p1"
1312 candidate_type = b"p1"
1312 elif candidaterev == p2:
1313 elif candidaterev == p2:
1313 candidate_type = b"p2"
1314 candidate_type = b"p2"
1314 elif self.revlog.issnapshot(candidaterev):
1315 elif self.revlog.issnapshot(candidaterev):
1315 candidate_type = b"snapshot-%d"
1316 candidate_type = b"snapshot-%d"
1316 candidate_type %= self.revlog.snapshotdepth(
1317 candidate_type %= self.revlog.snapshotdepth(
1317 candidaterev
1318 candidaterev
1318 )
1319 )
1319
1320
1320 if candidate_type is not None:
1321 if candidate_type is not None:
1321 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1322 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1322 msg %= candidate_type
1323 msg %= candidate_type
1323 self._write_debug(msg)
1324 self._write_debug(msg)
1324 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1325 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1325 msg %= self.revlog.length(candidaterev)
1326 msg %= self.revlog.length(candidaterev)
1326 self._write_debug(msg)
1327 self._write_debug(msg)
1327 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1328 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1328 msg %= self.revlog.deltaparent(candidaterev)
1329 msg %= self.revlog.deltaparent(candidaterev)
1329 self._write_debug(msg)
1330 self._write_debug(msg)
1330
1331
1331 dbg_try_count += 1
1332 dbg_try_count += 1
1332
1333
1333 if debug_search:
1334 if debug_search:
1334 delta_start = util.timer()
1335 delta_start = util.timer()
1335 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1336 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1336 if debug_search:
1337 if debug_search:
1337 delta_end = util.timer()
1338 delta_end = util.timer()
1338 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1339 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1339 msg %= delta_end - delta_start
1340 msg %= delta_end - delta_start
1340 self._write_debug(msg)
1341 self._write_debug(msg)
1341 if candidatedelta is not None:
1342 if candidatedelta is not None:
1342 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1343 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1343 if debug_search:
1344 if debug_search:
1344 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1345 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1345 msg %= candidatedelta.deltalen
1346 msg %= candidatedelta.deltalen
1346 self._write_debug(msg)
1347 self._write_debug(msg)
1347 nominateddeltas.append(candidatedelta)
1348 nominateddeltas.append(candidatedelta)
1348 elif debug_search:
1349 elif debug_search:
1349 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1350 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1350 msg %= candidatedelta.deltalen
1351 msg %= candidatedelta.deltalen
1351 self._write_debug(msg)
1352 self._write_debug(msg)
1352 elif debug_search:
1353 elif debug_search:
1353 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1354 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1354 self._write_debug(msg)
1355 self._write_debug(msg)
1355 if nominateddeltas:
1356 if nominateddeltas:
1356 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1357 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1357 if deltainfo is not None:
1358 if deltainfo is not None:
1358 candidaterevs = groups.send(deltainfo.base)
1359 candidaterevs = groups.send(deltainfo.base)
1359 else:
1360 else:
1360 candidaterevs = next(groups)
1361 candidaterevs = next(groups)
1361
1362
1362 if deltainfo is None:
1363 if deltainfo is None:
1363 dbg_type = b"full"
1364 dbg_type = b"full"
1364 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1365 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1365 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1366 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1366 dbg_type = b"snapshot"
1367 dbg_type = b"snapshot"
1367 else:
1368 else:
1368 dbg_type = b"delta"
1369 dbg_type = b"delta"
1369
1370
1370 if gather_debug:
1371 if gather_debug:
1371 end = util.timer()
1372 end = util.timer()
1372 used_cached = (
1373 used_cached = (
1373 cachedelta is not None
1374 cachedelta is not None
1374 and dbg_try_rounds == 1
1375 and dbg_try_rounds == 1
1375 and dbg_try_count == 1
1376 and dbg_try_count == 1
1376 and deltainfo.base == cachedelta[0]
1377 and deltainfo.base == cachedelta[0]
1377 )
1378 )
1378 dbg = {
1379 dbg = {
1379 'duration': end - start,
1380 'duration': end - start,
1380 'revision': target_rev,
1381 'revision': target_rev,
1381 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1382 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1382 'search_round_count': dbg_try_rounds,
1383 'search_round_count': dbg_try_rounds,
1383 'using-cached-base': used_cached,
1384 'using-cached-base': used_cached,
1384 'delta_try_count': dbg_try_count,
1385 'delta_try_count': dbg_try_count,
1385 'type': dbg_type,
1386 'type': dbg_type,
1386 'p1-chain-len': p1_chain_len,
1387 'p1-chain-len': p1_chain_len,
1387 'p2-chain-len': p2_chain_len,
1388 'p2-chain-len': p2_chain_len,
1388 }
1389 }
1389 if (
1390 if (
1390 deltainfo.snapshotdepth # pytype: disable=attribute-error
1391 deltainfo.snapshotdepth # pytype: disable=attribute-error
1391 is not None
1392 is not None
1392 ):
1393 ):
1393 dbg[
1394 dbg[
1394 'snapshot-depth'
1395 'snapshot-depth'
1395 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1396 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1396 else:
1397 else:
1397 dbg['snapshot-depth'] = 0
1398 dbg['snapshot-depth'] = 0
1398 target_revlog = b"UNKNOWN"
1399 target_revlog = b"UNKNOWN"
1399 target_type = self.revlog.target[0]
1400 target_type = self.revlog.target[0]
1400 target_key = self.revlog.target[1]
1401 target_key = self.revlog.target[1]
1401 if target_type == KIND_CHANGELOG:
1402 if target_type == KIND_CHANGELOG:
1402 target_revlog = b'CHANGELOG:'
1403 target_revlog = b'CHANGELOG:'
1403 elif target_type == KIND_MANIFESTLOG:
1404 elif target_type == KIND_MANIFESTLOG:
1404 target_revlog = b'MANIFESTLOG:'
1405 target_revlog = b'MANIFESTLOG:'
1405 if target_key:
1406 if target_key:
1406 target_revlog += b'%s:' % target_key
1407 target_revlog += b'%s:' % target_key
1407 elif target_type == KIND_FILELOG:
1408 elif target_type == KIND_FILELOG:
1408 target_revlog = b'FILELOG:'
1409 target_revlog = b'FILELOG:'
1409 if target_key:
1410 if target_key:
1410 target_revlog += b'%s:' % target_key
1411 target_revlog += b'%s:' % target_key
1411 dbg['target-revlog'] = target_revlog
1412 dbg['target-revlog'] = target_revlog
1412
1413
1413 if self._debug_info is not None:
1414 if self._debug_info is not None:
1414 self._debug_info.append(dbg)
1415 self._debug_info.append(dbg)
1415
1416
1416 if self._write_debug is not None:
1417 if self._write_debug is not None:
1417 msg = (
1418 msg = (
1418 b"DBG-DELTAS:"
1419 b"DBG-DELTAS:"
1419 b" %-12s"
1420 b" %-12s"
1420 b" rev=%d:"
1421 b" rev=%d:"
1421 b" delta-base=%d"
1422 b" delta-base=%d"
1422 b" is-cached=%d"
1423 b" is-cached=%d"
1423 b" - search-rounds=%d"
1424 b" - search-rounds=%d"
1424 b" try-count=%d"
1425 b" try-count=%d"
1425 b" - delta-type=%-6s"
1426 b" - delta-type=%-6s"
1426 b" snap-depth=%d"
1427 b" snap-depth=%d"
1427 b" - p1-chain-length=%d"
1428 b" - p1-chain-length=%d"
1428 b" p2-chain-length=%d"
1429 b" p2-chain-length=%d"
1429 b" - duration=%f"
1430 b" - duration=%f"
1430 b"\n"
1431 b"\n"
1431 )
1432 )
1432 msg %= (
1433 msg %= (
1433 dbg["target-revlog"],
1434 dbg["target-revlog"],
1434 dbg["revision"],
1435 dbg["revision"],
1435 dbg["delta-base"],
1436 dbg["delta-base"],
1436 dbg["using-cached-base"],
1437 dbg["using-cached-base"],
1437 dbg["search_round_count"],
1438 dbg["search_round_count"],
1438 dbg["delta_try_count"],
1439 dbg["delta_try_count"],
1439 dbg["type"],
1440 dbg["type"],
1440 dbg["snapshot-depth"],
1441 dbg["snapshot-depth"],
1441 dbg["p1-chain-len"],
1442 dbg["p1-chain-len"],
1442 dbg["p2-chain-len"],
1443 dbg["p2-chain-len"],
1443 dbg["duration"],
1444 dbg["duration"],
1444 )
1445 )
1445 self._write_debug(msg)
1446 self._write_debug(msg)
1446 return deltainfo
1447 return deltainfo
1447
1448
1448
1449
1449 def delta_compression(default_compression_header, deltainfo):
1450 def delta_compression(default_compression_header, deltainfo):
1450 """return (COMPRESSION_MODE, deltainfo)
1451 """return (COMPRESSION_MODE, deltainfo)
1451
1452
1452 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1453 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1453 compression.
1454 compression.
1454 """
1455 """
1455 h, d = deltainfo.data
1456 h, d = deltainfo.data
1456 compression_mode = COMP_MODE_INLINE
1457 compression_mode = COMP_MODE_INLINE
1457 if not h and not d:
1458 if not h and not d:
1458 # not data to store at all... declare them uncompressed
1459 # not data to store at all... declare them uncompressed
1459 compression_mode = COMP_MODE_PLAIN
1460 compression_mode = COMP_MODE_PLAIN
1460 elif not h:
1461 elif not h:
1461 t = d[0:1]
1462 t = d[0:1]
1462 if t == b'\0':
1463 if t == b'\0':
1463 compression_mode = COMP_MODE_PLAIN
1464 compression_mode = COMP_MODE_PLAIN
1464 elif t == default_compression_header:
1465 elif t == default_compression_header:
1465 compression_mode = COMP_MODE_DEFAULT
1466 compression_mode = COMP_MODE_DEFAULT
1466 elif h == b'u':
1467 elif h == b'u':
1467 # we have a more efficient way to declare uncompressed
1468 # we have a more efficient way to declare uncompressed
1468 h = b''
1469 h = b''
1469 compression_mode = COMP_MODE_PLAIN
1470 compression_mode = COMP_MODE_PLAIN
1470 deltainfo = drop_u_compression(deltainfo)
1471 deltainfo = drop_u_compression(deltainfo)
1471 return compression_mode, deltainfo
1472 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now