##// END OF EJS Templates
delta-find: use a single snapshot cache when applying a group to an object...
marmoute -
r50576:c261a628 default
parent child Browse files
Show More
@@ -1,1472 +1,1476 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_NO,
23 DELTA_BASE_REUSE_NO,
24 KIND_CHANGELOG,
24 KIND_CHANGELOG,
25 KIND_FILELOG,
25 KIND_FILELOG,
26 KIND_MANIFESTLOG,
26 KIND_MANIFESTLOG,
27 REVIDX_ISCENSORED,
27 REVIDX_ISCENSORED,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 )
29 )
30
30
31 from ..thirdparty import attr
31 from ..thirdparty import attr
32
32
33 from .. import (
33 from .. import (
34 error,
34 error,
35 mdiff,
35 mdiff,
36 util,
36 util,
37 )
37 )
38
38
39 from . import flagutil
39 from . import flagutil
40
40
41 # maximum <delta-chain-data>/<revision-text-length> ratio
41 # maximum <delta-chain-data>/<revision-text-length> ratio
42 LIMIT_DELTA2TEXT = 2
42 LIMIT_DELTA2TEXT = 2
43
43
44
44
45 class _testrevlog:
45 class _testrevlog:
46 """minimalist fake revlog to use in doctests"""
46 """minimalist fake revlog to use in doctests"""
47
47
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 """data is an list of revision payload boundaries"""
49 """data is an list of revision payload boundaries"""
50 self._data = data
50 self._data = data
51 self._srdensitythreshold = density
51 self._srdensitythreshold = density
52 self._srmingapsize = mingap
52 self._srmingapsize = mingap
53 self._snapshot = set(snapshot)
53 self._snapshot = set(snapshot)
54 self.index = None
54 self.index = None
55
55
56 def start(self, rev):
56 def start(self, rev):
57 if rev == nullrev:
57 if rev == nullrev:
58 return 0
58 return 0
59 if rev == 0:
59 if rev == 0:
60 return 0
60 return 0
61 return self._data[rev - 1]
61 return self._data[rev - 1]
62
62
63 def end(self, rev):
63 def end(self, rev):
64 if rev == nullrev:
64 if rev == nullrev:
65 return 0
65 return 0
66 return self._data[rev]
66 return self._data[rev]
67
67
68 def length(self, rev):
68 def length(self, rev):
69 return self.end(rev) - self.start(rev)
69 return self.end(rev) - self.start(rev)
70
70
71 def __len__(self):
71 def __len__(self):
72 return len(self._data)
72 return len(self._data)
73
73
74 def issnapshot(self, rev):
74 def issnapshot(self, rev):
75 if rev == nullrev:
75 if rev == nullrev:
76 return True
76 return True
77 return rev in self._snapshot
77 return rev in self._snapshot
78
78
79
79
80 def slicechunk(revlog, revs, targetsize=None):
80 def slicechunk(revlog, revs, targetsize=None):
81 """slice revs to reduce the amount of unrelated data to be read from disk.
81 """slice revs to reduce the amount of unrelated data to be read from disk.
82
82
83 ``revs`` is sliced into groups that should be read in one time.
83 ``revs`` is sliced into groups that should be read in one time.
84 Assume that revs are sorted.
84 Assume that revs are sorted.
85
85
86 The initial chunk is sliced until the overall density (payload/chunks-span
86 The initial chunk is sliced until the overall density (payload/chunks-span
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 `revlog._srmingapsize` is skipped.
88 `revlog._srmingapsize` is skipped.
89
89
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 For consistency with other slicing choice, this limit won't go lower than
91 For consistency with other slicing choice, this limit won't go lower than
92 `revlog._srmingapsize`.
92 `revlog._srmingapsize`.
93
93
94 If individual revisions chunk are larger than this limit, they will still
94 If individual revisions chunk are larger than this limit, they will still
95 be raised individually.
95 be raised individually.
96
96
97 >>> data = [
97 >>> data = [
98 ... 5, #00 (5)
98 ... 5, #00 (5)
99 ... 10, #01 (5)
99 ... 10, #01 (5)
100 ... 12, #02 (2)
100 ... 12, #02 (2)
101 ... 12, #03 (empty)
101 ... 12, #03 (empty)
102 ... 27, #04 (15)
102 ... 27, #04 (15)
103 ... 31, #05 (4)
103 ... 31, #05 (4)
104 ... 31, #06 (empty)
104 ... 31, #06 (empty)
105 ... 42, #07 (11)
105 ... 42, #07 (11)
106 ... 47, #08 (5)
106 ... 47, #08 (5)
107 ... 47, #09 (empty)
107 ... 47, #09 (empty)
108 ... 48, #10 (1)
108 ... 48, #10 (1)
109 ... 51, #11 (3)
109 ... 51, #11 (3)
110 ... 74, #12 (23)
110 ... 74, #12 (23)
111 ... 85, #13 (11)
111 ... 85, #13 (11)
112 ... 86, #14 (1)
112 ... 86, #14 (1)
113 ... 91, #15 (5)
113 ... 91, #15 (5)
114 ... ]
114 ... ]
115 >>> revlog = _testrevlog(data, snapshot=range(16))
115 >>> revlog = _testrevlog(data, snapshot=range(16))
116
116
117 >>> list(slicechunk(revlog, list(range(16))))
117 >>> list(slicechunk(revlog, list(range(16))))
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 >>> list(slicechunk(revlog, [0, 15]))
119 >>> list(slicechunk(revlog, [0, 15]))
120 [[0], [15]]
120 [[0], [15]]
121 >>> list(slicechunk(revlog, [0, 11, 15]))
121 >>> list(slicechunk(revlog, [0, 11, 15]))
122 [[0], [11], [15]]
122 [[0], [11], [15]]
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 [[0], [11, 13, 15]]
124 [[0], [11, 13, 15]]
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 [[1, 2], [5, 8, 10, 11], [14]]
126 [[1, 2], [5, 8, 10, 11], [14]]
127
127
128 Slicing with a maximum chunk size
128 Slicing with a maximum chunk size
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 [[0], [11], [13], [15]]
130 [[0], [11], [13], [15]]
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 [[0], [11], [13, 15]]
132 [[0], [11], [13, 15]]
133
133
134 Slicing involving nullrev
134 Slicing involving nullrev
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 [[-1, 0], [11], [13, 15]]
136 [[-1, 0], [11], [13, 15]]
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 [[-1], [13], [15]]
138 [[-1], [13], [15]]
139 """
139 """
140 if targetsize is not None:
140 if targetsize is not None:
141 targetsize = max(targetsize, revlog._srmingapsize)
141 targetsize = max(targetsize, revlog._srmingapsize)
142 # targetsize should not be specified when evaluating delta candidates:
142 # targetsize should not be specified when evaluating delta candidates:
143 # * targetsize is used to ensure we stay within specification when reading,
143 # * targetsize is used to ensure we stay within specification when reading,
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 if densityslicing is None:
145 if densityslicing is None:
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 for chunk in densityslicing(
147 for chunk in densityslicing(
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 ):
149 ):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 yield subchunk
151 yield subchunk
152
152
153
153
154 def _slicechunktosize(revlog, revs, targetsize=None):
154 def _slicechunktosize(revlog, revs, targetsize=None):
155 """slice revs to match the target size
155 """slice revs to match the target size
156
156
157 This is intended to be used on chunk that density slicing selected by that
157 This is intended to be used on chunk that density slicing selected by that
158 are still too large compared to the read garantee of revlog. This might
158 are still too large compared to the read garantee of revlog. This might
159 happens when "minimal gap size" interrupted the slicing or when chain are
159 happens when "minimal gap size" interrupted the slicing or when chain are
160 built in a way that create large blocks next to each other.
160 built in a way that create large blocks next to each other.
161
161
162 >>> data = [
162 >>> data = [
163 ... 3, #0 (3)
163 ... 3, #0 (3)
164 ... 5, #1 (2)
164 ... 5, #1 (2)
165 ... 6, #2 (1)
165 ... 6, #2 (1)
166 ... 8, #3 (2)
166 ... 8, #3 (2)
167 ... 8, #4 (empty)
167 ... 8, #4 (empty)
168 ... 11, #5 (3)
168 ... 11, #5 (3)
169 ... 12, #6 (1)
169 ... 12, #6 (1)
170 ... 13, #7 (1)
170 ... 13, #7 (1)
171 ... 14, #8 (1)
171 ... 14, #8 (1)
172 ... ]
172 ... ]
173
173
174 == All snapshots cases ==
174 == All snapshots cases ==
175 >>> revlog = _testrevlog(data, snapshot=range(9))
175 >>> revlog = _testrevlog(data, snapshot=range(9))
176
176
177 Cases where chunk is already small enough
177 Cases where chunk is already small enough
178 >>> list(_slicechunktosize(revlog, [0], 3))
178 >>> list(_slicechunktosize(revlog, [0], 3))
179 [[0]]
179 [[0]]
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 [[6, 7]]
181 [[6, 7]]
182 >>> list(_slicechunktosize(revlog, [0], None))
182 >>> list(_slicechunktosize(revlog, [0], None))
183 [[0]]
183 [[0]]
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 [[6, 7]]
185 [[6, 7]]
186
186
187 cases where we need actual slicing
187 cases where we need actual slicing
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 [[0], [1]]
189 [[0], [1]]
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 [[1], [3]]
191 [[1], [3]]
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 [[1, 2], [3]]
193 [[1, 2], [3]]
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 [[3], [5]]
195 [[3], [5]]
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 [[3], [5]]
197 [[3], [5]]
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 [[5], [6, 7, 8]]
199 [[5], [6, 7, 8]]
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
201 [[0], [1, 2], [3], [5], [6, 7, 8]]
202
202
203 Case with too large individual chunk (must return valid chunk)
203 Case with too large individual chunk (must return valid chunk)
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 [[3], [5]]
209 [[3], [5]]
210
210
211 == No Snapshot cases ==
211 == No Snapshot cases ==
212 >>> revlog = _testrevlog(data)
212 >>> revlog = _testrevlog(data)
213
213
214 Cases where chunk is already small enough
214 Cases where chunk is already small enough
215 >>> list(_slicechunktosize(revlog, [0], 3))
215 >>> list(_slicechunktosize(revlog, [0], 3))
216 [[0]]
216 [[0]]
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 [[6, 7]]
218 [[6, 7]]
219 >>> list(_slicechunktosize(revlog, [0], None))
219 >>> list(_slicechunktosize(revlog, [0], None))
220 [[0]]
220 [[0]]
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 [[6, 7]]
222 [[6, 7]]
223
223
224 cases where we need actual slicing
224 cases where we need actual slicing
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 [[0], [1]]
226 [[0], [1]]
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 [[1], [3]]
228 [[1], [3]]
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 [[1], [2, 3]]
230 [[1], [2, 3]]
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 [[3], [5]]
232 [[3], [5]]
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 [[3], [4, 5]]
234 [[3], [4, 5]]
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 [[5], [6, 7, 8]]
236 [[5], [6, 7, 8]]
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
238 [[0], [1, 2], [3], [5], [6, 7, 8]]
239
239
240 Case with too large individual chunk (must return valid chunk)
240 Case with too large individual chunk (must return valid chunk)
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 [[0], [1]]
242 [[0], [1]]
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 [[1], [3]]
244 [[1], [3]]
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 [[3], [5]]
246 [[3], [5]]
247
247
248 == mixed case ==
248 == mixed case ==
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 """
252 """
253 assert targetsize is None or 0 <= targetsize
253 assert targetsize is None or 0 <= targetsize
254 startdata = revlog.start(revs[0])
254 startdata = revlog.start(revs[0])
255 enddata = revlog.end(revs[-1])
255 enddata = revlog.end(revs[-1])
256 fullspan = enddata - startdata
256 fullspan = enddata - startdata
257 if targetsize is None or fullspan <= targetsize:
257 if targetsize is None or fullspan <= targetsize:
258 yield revs
258 yield revs
259 return
259 return
260
260
261 startrevidx = 0
261 startrevidx = 0
262 endrevidx = 1
262 endrevidx = 1
263 iterrevs = enumerate(revs)
263 iterrevs = enumerate(revs)
264 next(iterrevs) # skip first rev.
264 next(iterrevs) # skip first rev.
265 # first step: get snapshots out of the way
265 # first step: get snapshots out of the way
266 for idx, r in iterrevs:
266 for idx, r in iterrevs:
267 span = revlog.end(r) - startdata
267 span = revlog.end(r) - startdata
268 snapshot = revlog.issnapshot(r)
268 snapshot = revlog.issnapshot(r)
269 if span <= targetsize and snapshot:
269 if span <= targetsize and snapshot:
270 endrevidx = idx + 1
270 endrevidx = idx + 1
271 else:
271 else:
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 if chunk:
273 if chunk:
274 yield chunk
274 yield chunk
275 startrevidx = idx
275 startrevidx = idx
276 startdata = revlog.start(r)
276 startdata = revlog.start(r)
277 endrevidx = idx + 1
277 endrevidx = idx + 1
278 if not snapshot:
278 if not snapshot:
279 break
279 break
280
280
281 # for the others, we use binary slicing to quickly converge toward valid
281 # for the others, we use binary slicing to quickly converge toward valid
282 # chunks (otherwise, we might end up looking for start/end of many
282 # chunks (otherwise, we might end up looking for start/end of many
283 # revisions). This logic is not looking for the perfect slicing point, it
283 # revisions). This logic is not looking for the perfect slicing point, it
284 # focuses on quickly converging toward valid chunks.
284 # focuses on quickly converging toward valid chunks.
285 nbitem = len(revs)
285 nbitem = len(revs)
286 while (enddata - startdata) > targetsize:
286 while (enddata - startdata) > targetsize:
287 endrevidx = nbitem
287 endrevidx = nbitem
288 if nbitem - startrevidx <= 1:
288 if nbitem - startrevidx <= 1:
289 break # protect against individual chunk larger than limit
289 break # protect against individual chunk larger than limit
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 while span > targetsize:
292 while span > targetsize:
293 if endrevidx - startrevidx <= 1:
293 if endrevidx - startrevidx <= 1:
294 break # protect against individual chunk larger than limit
294 break # protect against individual chunk larger than limit
295 endrevidx -= (endrevidx - startrevidx) // 2
295 endrevidx -= (endrevidx - startrevidx) // 2
296 localenddata = revlog.end(revs[endrevidx - 1])
296 localenddata = revlog.end(revs[endrevidx - 1])
297 span = localenddata - startdata
297 span = localenddata - startdata
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301 startrevidx = endrevidx
301 startrevidx = endrevidx
302 startdata = revlog.start(revs[startrevidx])
302 startdata = revlog.start(revs[startrevidx])
303
303
304 chunk = _trimchunk(revlog, revs, startrevidx)
304 chunk = _trimchunk(revlog, revs, startrevidx)
305 if chunk:
305 if chunk:
306 yield chunk
306 yield chunk
307
307
308
308
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 """slice revs to reduce the amount of unrelated data to be read from disk.
310 """slice revs to reduce the amount of unrelated data to be read from disk.
311
311
312 ``revs`` is sliced into groups that should be read in one time.
312 ``revs`` is sliced into groups that should be read in one time.
313 Assume that revs are sorted.
313 Assume that revs are sorted.
314
314
315 The initial chunk is sliced until the overall density (payload/chunks-span
315 The initial chunk is sliced until the overall density (payload/chunks-span
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 skipped.
317 skipped.
318
318
319 >>> revlog = _testrevlog([
319 >>> revlog = _testrevlog([
320 ... 5, #00 (5)
320 ... 5, #00 (5)
321 ... 10, #01 (5)
321 ... 10, #01 (5)
322 ... 12, #02 (2)
322 ... 12, #02 (2)
323 ... 12, #03 (empty)
323 ... 12, #03 (empty)
324 ... 27, #04 (15)
324 ... 27, #04 (15)
325 ... 31, #05 (4)
325 ... 31, #05 (4)
326 ... 31, #06 (empty)
326 ... 31, #06 (empty)
327 ... 42, #07 (11)
327 ... 42, #07 (11)
328 ... 47, #08 (5)
328 ... 47, #08 (5)
329 ... 47, #09 (empty)
329 ... 47, #09 (empty)
330 ... 48, #10 (1)
330 ... 48, #10 (1)
331 ... 51, #11 (3)
331 ... 51, #11 (3)
332 ... 74, #12 (23)
332 ... 74, #12 (23)
333 ... 85, #13 (11)
333 ... 85, #13 (11)
334 ... 86, #14 (1)
334 ... 86, #14 (1)
335 ... 91, #15 (5)
335 ... 91, #15 (5)
336 ... ])
336 ... ])
337
337
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 [[0], [15]]
341 [[0], [15]]
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 [[0], [11], [15]]
343 [[0], [11], [15]]
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 [[0], [11, 13, 15]]
345 [[0], [11, 13, 15]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 [[1, 2], [5, 8, 10, 11], [14]]
347 [[1, 2], [5, 8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... mingapsize=20))
349 ... mingapsize=20))
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 ... targetdensity=0.95))
352 ... targetdensity=0.95))
353 [[1, 2], [5], [8, 10, 11], [14]]
353 [[1, 2], [5], [8, 10, 11], [14]]
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 ... targetdensity=0.95, mingapsize=12))
355 ... targetdensity=0.95, mingapsize=12))
356 [[1, 2], [5, 8, 10, 11], [14]]
356 [[1, 2], [5, 8, 10, 11], [14]]
357 """
357 """
358 start = revlog.start
358 start = revlog.start
359 length = revlog.length
359 length = revlog.length
360
360
361 if len(revs) <= 1:
361 if len(revs) <= 1:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 deltachainspan = segmentspan(revlog, revs)
365 deltachainspan = segmentspan(revlog, revs)
366
366
367 if deltachainspan < mingapsize:
367 if deltachainspan < mingapsize:
368 yield revs
368 yield revs
369 return
369 return
370
370
371 readdata = deltachainspan
371 readdata = deltachainspan
372 chainpayload = sum(length(r) for r in revs)
372 chainpayload = sum(length(r) for r in revs)
373
373
374 if deltachainspan:
374 if deltachainspan:
375 density = chainpayload / float(deltachainspan)
375 density = chainpayload / float(deltachainspan)
376 else:
376 else:
377 density = 1.0
377 density = 1.0
378
378
379 if density >= targetdensity:
379 if density >= targetdensity:
380 yield revs
380 yield revs
381 return
381 return
382
382
383 # Store the gaps in a heap to have them sorted by decreasing size
383 # Store the gaps in a heap to have them sorted by decreasing size
384 gaps = []
384 gaps = []
385 prevend = None
385 prevend = None
386 for i, rev in enumerate(revs):
386 for i, rev in enumerate(revs):
387 revstart = start(rev)
387 revstart = start(rev)
388 revlen = length(rev)
388 revlen = length(rev)
389
389
390 # Skip empty revisions to form larger holes
390 # Skip empty revisions to form larger holes
391 if revlen == 0:
391 if revlen == 0:
392 continue
392 continue
393
393
394 if prevend is not None:
394 if prevend is not None:
395 gapsize = revstart - prevend
395 gapsize = revstart - prevend
396 # only consider holes that are large enough
396 # only consider holes that are large enough
397 if gapsize > mingapsize:
397 if gapsize > mingapsize:
398 gaps.append((gapsize, i))
398 gaps.append((gapsize, i))
399
399
400 prevend = revstart + revlen
400 prevend = revstart + revlen
401 # sort the gaps to pop them from largest to small
401 # sort the gaps to pop them from largest to small
402 gaps.sort()
402 gaps.sort()
403
403
404 # Collect the indices of the largest holes until the density is acceptable
404 # Collect the indices of the largest holes until the density is acceptable
405 selected = []
405 selected = []
406 while gaps and density < targetdensity:
406 while gaps and density < targetdensity:
407 gapsize, gapidx = gaps.pop()
407 gapsize, gapidx = gaps.pop()
408
408
409 selected.append(gapidx)
409 selected.append(gapidx)
410
410
411 # the gap sizes are stored as negatives to be sorted decreasingly
411 # the gap sizes are stored as negatives to be sorted decreasingly
412 # by the heap
412 # by the heap
413 readdata -= gapsize
413 readdata -= gapsize
414 if readdata > 0:
414 if readdata > 0:
415 density = chainpayload / float(readdata)
415 density = chainpayload / float(readdata)
416 else:
416 else:
417 density = 1.0
417 density = 1.0
418 selected.sort()
418 selected.sort()
419
419
420 # Cut the revs at collected indices
420 # Cut the revs at collected indices
421 previdx = 0
421 previdx = 0
422 for idx in selected:
422 for idx in selected:
423
423
424 chunk = _trimchunk(revlog, revs, previdx, idx)
424 chunk = _trimchunk(revlog, revs, previdx, idx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428 previdx = idx
428 previdx = idx
429
429
430 chunk = _trimchunk(revlog, revs, previdx)
430 chunk = _trimchunk(revlog, revs, previdx)
431 if chunk:
431 if chunk:
432 yield chunk
432 yield chunk
433
433
434
434
435 def _trimchunk(revlog, revs, startidx, endidx=None):
435 def _trimchunk(revlog, revs, startidx, endidx=None):
436 """returns revs[startidx:endidx] without empty trailing revs
436 """returns revs[startidx:endidx] without empty trailing revs
437
437
438 Doctest Setup
438 Doctest Setup
439 >>> revlog = _testrevlog([
439 >>> revlog = _testrevlog([
440 ... 5, #0
440 ... 5, #0
441 ... 10, #1
441 ... 10, #1
442 ... 12, #2
442 ... 12, #2
443 ... 12, #3 (empty)
443 ... 12, #3 (empty)
444 ... 17, #4
444 ... 17, #4
445 ... 21, #5
445 ... 21, #5
446 ... 21, #6 (empty)
446 ... 21, #6 (empty)
447 ... ])
447 ... ])
448
448
449 Contiguous cases:
449 Contiguous cases:
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 [0, 1, 2, 3, 4, 5]
451 [0, 1, 2, 3, 4, 5]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 [0, 1, 2, 3, 4]
453 [0, 1, 2, 3, 4]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 [0, 1, 2]
455 [0, 1, 2]
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 [2]
457 [2]
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 [3, 4, 5]
459 [3, 4, 5]
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 [3, 4]
461 [3, 4]
462
462
463 Discontiguous cases:
463 Discontiguous cases:
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 [1, 3, 5]
465 [1, 3, 5]
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 [1]
467 [1]
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 [3, 5]
469 [3, 5]
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 [3, 5]
471 [3, 5]
472 """
472 """
473 length = revlog.length
473 length = revlog.length
474
474
475 if endidx is None:
475 if endidx is None:
476 endidx = len(revs)
476 endidx = len(revs)
477
477
478 # If we have a non-emtpy delta candidate, there are nothing to trim
478 # If we have a non-emtpy delta candidate, there are nothing to trim
479 if revs[endidx - 1] < len(revlog):
479 if revs[endidx - 1] < len(revlog):
480 # Trim empty revs at the end, except the very first revision of a chain
480 # Trim empty revs at the end, except the very first revision of a chain
481 while (
481 while (
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 ):
483 ):
484 endidx -= 1
484 endidx -= 1
485
485
486 return revs[startidx:endidx]
486 return revs[startidx:endidx]
487
487
488
488
489 def segmentspan(revlog, revs):
489 def segmentspan(revlog, revs):
490 """Get the byte span of a segment of revisions
490 """Get the byte span of a segment of revisions
491
491
492 revs is a sorted array of revision numbers
492 revs is a sorted array of revision numbers
493
493
494 >>> revlog = _testrevlog([
494 >>> revlog = _testrevlog([
495 ... 5, #0
495 ... 5, #0
496 ... 10, #1
496 ... 10, #1
497 ... 12, #2
497 ... 12, #2
498 ... 12, #3 (empty)
498 ... 12, #3 (empty)
499 ... 17, #4
499 ... 17, #4
500 ... ])
500 ... ])
501
501
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 17
503 17
504 >>> segmentspan(revlog, [0, 4])
504 >>> segmentspan(revlog, [0, 4])
505 17
505 17
506 >>> segmentspan(revlog, [3, 4])
506 >>> segmentspan(revlog, [3, 4])
507 5
507 5
508 >>> segmentspan(revlog, [1, 2, 3,])
508 >>> segmentspan(revlog, [1, 2, 3,])
509 7
509 7
510 >>> segmentspan(revlog, [1, 3])
510 >>> segmentspan(revlog, [1, 3])
511 7
511 7
512 """
512 """
513 if not revs:
513 if not revs:
514 return 0
514 return 0
515 end = revlog.end(revs[-1])
515 end = revlog.end(revs[-1])
516 return end - revlog.start(revs[0])
516 return end - revlog.start(revs[0])
517
517
518
518
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 """build full text from a (base, delta) pair and other metadata"""
520 """build full text from a (base, delta) pair and other metadata"""
521 # special case deltas which replace entire base; no need to decode
521 # special case deltas which replace entire base; no need to decode
522 # base revision. this neatly avoids censored bases, which throw when
522 # base revision. this neatly avoids censored bases, which throw when
523 # they're decoded.
523 # they're decoded.
524 hlen = struct.calcsize(b">lll")
524 hlen = struct.calcsize(b">lll")
525 if delta[:hlen] == mdiff.replacediffheader(
525 if delta[:hlen] == mdiff.replacediffheader(
526 revlog.rawsize(baserev), len(delta) - hlen
526 revlog.rawsize(baserev), len(delta) - hlen
527 ):
527 ):
528 fulltext = delta[hlen:]
528 fulltext = delta[hlen:]
529 else:
529 else:
530 # deltabase is rawtext before changed by flag processors, which is
530 # deltabase is rawtext before changed by flag processors, which is
531 # equivalent to non-raw text
531 # equivalent to non-raw text
532 basetext = revlog.revision(baserev, _df=fh)
532 basetext = revlog.revision(baserev, _df=fh)
533 fulltext = mdiff.patch(basetext, delta)
533 fulltext = mdiff.patch(basetext, delta)
534
534
535 try:
535 try:
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 if validatehash:
537 if validatehash:
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 if flags & REVIDX_ISCENSORED:
539 if flags & REVIDX_ISCENSORED:
540 raise error.StorageError(
540 raise error.StorageError(
541 _(b'node %s is not censored') % expectednode
541 _(b'node %s is not censored') % expectednode
542 )
542 )
543 except error.CensoredNodeError:
543 except error.CensoredNodeError:
544 # must pass the censored index flag to add censored revisions
544 # must pass the censored index flag to add censored revisions
545 if not flags & REVIDX_ISCENSORED:
545 if not flags & REVIDX_ISCENSORED:
546 raise
546 raise
547 return fulltext
547 return fulltext
548
548
549
549
550 @attr.s(slots=True, frozen=True)
550 @attr.s(slots=True, frozen=True)
551 class _deltainfo:
551 class _deltainfo:
552 distance = attr.ib()
552 distance = attr.ib()
553 deltalen = attr.ib()
553 deltalen = attr.ib()
554 data = attr.ib()
554 data = attr.ib()
555 base = attr.ib()
555 base = attr.ib()
556 chainbase = attr.ib()
556 chainbase = attr.ib()
557 chainlen = attr.ib()
557 chainlen = attr.ib()
558 compresseddeltalen = attr.ib()
558 compresseddeltalen = attr.ib()
559 snapshotdepth = attr.ib()
559 snapshotdepth = attr.ib()
560
560
561
561
562 def drop_u_compression(delta):
562 def drop_u_compression(delta):
563 """turn into a "u" (no-compression) into no-compression without header
563 """turn into a "u" (no-compression) into no-compression without header
564
564
565 This is useful for revlog format that has better compression method.
565 This is useful for revlog format that has better compression method.
566 """
566 """
567 assert delta.data[0] == b'u', delta.data[0]
567 assert delta.data[0] == b'u', delta.data[0]
568 return _deltainfo(
568 return _deltainfo(
569 delta.distance,
569 delta.distance,
570 delta.deltalen - 1,
570 delta.deltalen - 1,
571 (b'', delta.data[1]),
571 (b'', delta.data[1]),
572 delta.base,
572 delta.base,
573 delta.chainbase,
573 delta.chainbase,
574 delta.chainlen,
574 delta.chainlen,
575 delta.compresseddeltalen,
575 delta.compresseddeltalen,
576 delta.snapshotdepth,
576 delta.snapshotdepth,
577 )
577 )
578
578
579
579
580 def is_good_delta_info(revlog, deltainfo, revinfo):
580 def is_good_delta_info(revlog, deltainfo, revinfo):
581 """Returns True if the given delta is good. Good means that it is within
581 """Returns True if the given delta is good. Good means that it is within
582 the disk span, disk size, and chain length bounds that we know to be
582 the disk span, disk size, and chain length bounds that we know to be
583 performant."""
583 performant."""
584 if deltainfo is None:
584 if deltainfo is None:
585 return False
585 return False
586
586
587 # - 'deltainfo.distance' is the distance from the base revision --
587 # - 'deltainfo.distance' is the distance from the base revision --
588 # bounding it limits the amount of I/O we need to do.
588 # bounding it limits the amount of I/O we need to do.
589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
590 # deltas we need to apply -- bounding it limits the amount of CPU
590 # deltas we need to apply -- bounding it limits the amount of CPU
591 # we consume.
591 # we consume.
592
592
593 textlen = revinfo.textlen
593 textlen = revinfo.textlen
594 defaultmax = textlen * 4
594 defaultmax = textlen * 4
595 maxdist = revlog._maxdeltachainspan
595 maxdist = revlog._maxdeltachainspan
596 if not maxdist:
596 if not maxdist:
597 maxdist = deltainfo.distance # ensure the conditional pass
597 maxdist = deltainfo.distance # ensure the conditional pass
598 maxdist = max(maxdist, defaultmax)
598 maxdist = max(maxdist, defaultmax)
599
599
600 # Bad delta from read span:
600 # Bad delta from read span:
601 #
601 #
602 # If the span of data read is larger than the maximum allowed.
602 # If the span of data read is larger than the maximum allowed.
603 #
603 #
604 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 # In the sparse-revlog case, we rely on the associated "sparse reading"
605 # to avoid issue related to the span of data. In theory, it would be
605 # to avoid issue related to the span of data. In theory, it would be
606 # possible to build pathological revlog where delta pattern would lead
606 # possible to build pathological revlog where delta pattern would lead
607 # to too many reads. However, they do not happen in practice at all. So
607 # to too many reads. However, they do not happen in practice at all. So
608 # we skip the span check entirely.
608 # we skip the span check entirely.
609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 if not revlog._sparserevlog and maxdist < deltainfo.distance:
610 return False
610 return False
611
611
612 # Bad delta from new delta size:
612 # Bad delta from new delta size:
613 #
613 #
614 # If the delta size is larger than the target text, storing the
614 # If the delta size is larger than the target text, storing the
615 # delta will be inefficient.
615 # delta will be inefficient.
616 if textlen < deltainfo.deltalen:
616 if textlen < deltainfo.deltalen:
617 return False
617 return False
618
618
619 # Bad delta from cumulated payload size:
619 # Bad delta from cumulated payload size:
620 #
620 #
621 # If the sum of delta get larger than K * target text length.
621 # If the sum of delta get larger than K * target text length.
622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
623 return False
623 return False
624
624
625 # Bad delta from chain length:
625 # Bad delta from chain length:
626 #
626 #
627 # If the number of delta in the chain gets too high.
627 # If the number of delta in the chain gets too high.
628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
629 return False
629 return False
630
630
631 # bad delta from intermediate snapshot size limit
631 # bad delta from intermediate snapshot size limit
632 #
632 #
633 # If an intermediate snapshot size is higher than the limit. The
633 # If an intermediate snapshot size is higher than the limit. The
634 # limit exist to prevent endless chain of intermediate delta to be
634 # limit exist to prevent endless chain of intermediate delta to be
635 # created.
635 # created.
636 if (
636 if (
637 deltainfo.snapshotdepth is not None
637 deltainfo.snapshotdepth is not None
638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
639 ):
639 ):
640 return False
640 return False
641
641
642 # bad delta if new intermediate snapshot is larger than the previous
642 # bad delta if new intermediate snapshot is larger than the previous
643 # snapshot
643 # snapshot
644 if (
644 if (
645 deltainfo.snapshotdepth
645 deltainfo.snapshotdepth
646 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 and revlog.length(deltainfo.base) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 return True
650 return True
651
651
652
652
653 # If a revision's full text is that much bigger than a base candidate full
653 # If a revision's full text is that much bigger than a base candidate full
654 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 # text's, it is very unlikely that it will produce a valid delta. We no longer
655 # consider these candidates.
655 # consider these candidates.
656 LIMIT_BASE2TEXT = 500
656 LIMIT_BASE2TEXT = 500
657
657
658
658
659 def _candidategroups(
659 def _candidategroups(
660 revlog,
660 revlog,
661 textlen,
661 textlen,
662 p1,
662 p1,
663 p2,
663 p2,
664 cachedelta,
664 cachedelta,
665 excluded_bases=None,
665 excluded_bases=None,
666 target_rev=None,
666 target_rev=None,
667 snapshot_cache=None,
667 ):
668 ):
668 """Provides group of revision to be tested as delta base
669 """Provides group of revision to be tested as delta base
669
670
670 This top level function focus on emitting groups with unique and worthwhile
671 This top level function focus on emitting groups with unique and worthwhile
671 content. See _raw_candidate_groups for details about the group order.
672 content. See _raw_candidate_groups for details about the group order.
672 """
673 """
673 # should we try to build a delta?
674 # should we try to build a delta?
674 if not (len(revlog) and revlog._storedeltachains):
675 if not (len(revlog) and revlog._storedeltachains):
675 yield None
676 yield None
676 return
677 return
677
678
678 deltalength = revlog.length
679 deltalength = revlog.length
679 deltaparent = revlog.deltaparent
680 deltaparent = revlog.deltaparent
680 sparse = revlog._sparserevlog
681 sparse = revlog._sparserevlog
681 good = None
682 good = None
682
683
683 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 deltas_limit = textlen * LIMIT_DELTA2TEXT
684 group_chunk_size = revlog._candidate_group_chunk_size
685 group_chunk_size = revlog._candidate_group_chunk_size
685
686
686 tested = {nullrev}
687 tested = {nullrev}
687 candidates = _refinedgroups(
688 candidates = _refinedgroups(
688 revlog,
689 revlog,
689 p1,
690 p1,
690 p2,
691 p2,
691 cachedelta,
692 cachedelta,
693 snapshot_cache=snapshot_cache,
692 )
694 )
693 while True:
695 while True:
694 temptative = candidates.send(good)
696 temptative = candidates.send(good)
695 if temptative is None:
697 if temptative is None:
696 break
698 break
697 group = []
699 group = []
698 for rev in temptative:
700 for rev in temptative:
699 # skip over empty delta (no need to include them in a chain)
701 # skip over empty delta (no need to include them in a chain)
700 while revlog._generaldelta and not (
702 while revlog._generaldelta and not (
701 rev == nullrev or rev in tested or deltalength(rev)
703 rev == nullrev or rev in tested or deltalength(rev)
702 ):
704 ):
703 tested.add(rev)
705 tested.add(rev)
704 rev = deltaparent(rev)
706 rev = deltaparent(rev)
705 # no need to try a delta against nullrev, this will be done as a
707 # no need to try a delta against nullrev, this will be done as a
706 # last resort.
708 # last resort.
707 if rev == nullrev:
709 if rev == nullrev:
708 continue
710 continue
709 # filter out revision we tested already
711 # filter out revision we tested already
710 if rev in tested:
712 if rev in tested:
711 continue
713 continue
712 # an higher authority deamed the base unworthy (e.g. censored)
714 # an higher authority deamed the base unworthy (e.g. censored)
713 if excluded_bases is not None and rev in excluded_bases:
715 if excluded_bases is not None and rev in excluded_bases:
714 tested.add(rev)
716 tested.add(rev)
715 continue
717 continue
716 # We are in some recomputation cases and that rev is too high in
718 # We are in some recomputation cases and that rev is too high in
717 # the revlog
719 # the revlog
718 if target_rev is not None and rev >= target_rev:
720 if target_rev is not None and rev >= target_rev:
719 tested.add(rev)
721 tested.add(rev)
720 continue
722 continue
721 # filter out delta base that will never produce good delta
723 # filter out delta base that will never produce good delta
722 if deltas_limit < revlog.length(rev):
724 if deltas_limit < revlog.length(rev):
723 tested.add(rev)
725 tested.add(rev)
724 continue
726 continue
725 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
727 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
726 tested.add(rev)
728 tested.add(rev)
727 continue
729 continue
728 # no delta for rawtext-changing revs (see "candelta" for why)
730 # no delta for rawtext-changing revs (see "candelta" for why)
729 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
731 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
730 tested.add(rev)
732 tested.add(rev)
731 continue
733 continue
732
734
733 # If we reach here, we are about to build and test a delta.
735 # If we reach here, we are about to build and test a delta.
734 # The delta building process will compute the chaininfo in all
736 # The delta building process will compute the chaininfo in all
735 # case, since that computation is cached, it is fine to access it
737 # case, since that computation is cached, it is fine to access it
736 # here too.
738 # here too.
737 chainlen, chainsize = revlog._chaininfo(rev)
739 chainlen, chainsize = revlog._chaininfo(rev)
738 # if chain will be too long, skip base
740 # if chain will be too long, skip base
739 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
741 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
740 tested.add(rev)
742 tested.add(rev)
741 continue
743 continue
742 # if chain already have too much data, skip base
744 # if chain already have too much data, skip base
743 if deltas_limit < chainsize:
745 if deltas_limit < chainsize:
744 tested.add(rev)
746 tested.add(rev)
745 continue
747 continue
746 if sparse and revlog.upperboundcomp is not None:
748 if sparse and revlog.upperboundcomp is not None:
747 maxcomp = revlog.upperboundcomp
749 maxcomp = revlog.upperboundcomp
748 basenotsnap = (p1, p2, nullrev)
750 basenotsnap = (p1, p2, nullrev)
749 if rev not in basenotsnap and revlog.issnapshot(rev):
751 if rev not in basenotsnap and revlog.issnapshot(rev):
750 snapshotdepth = revlog.snapshotdepth(rev)
752 snapshotdepth = revlog.snapshotdepth(rev)
751 # If text is significantly larger than the base, we can
753 # If text is significantly larger than the base, we can
752 # expect the resulting delta to be proportional to the size
754 # expect the resulting delta to be proportional to the size
753 # difference
755 # difference
754 revsize = revlog.rawsize(rev)
756 revsize = revlog.rawsize(rev)
755 rawsizedistance = max(textlen - revsize, 0)
757 rawsizedistance = max(textlen - revsize, 0)
756 # use an estimate of the compression upper bound.
758 # use an estimate of the compression upper bound.
757 lowestrealisticdeltalen = rawsizedistance // maxcomp
759 lowestrealisticdeltalen = rawsizedistance // maxcomp
758
760
759 # check the absolute constraint on the delta size
761 # check the absolute constraint on the delta size
760 snapshotlimit = textlen >> snapshotdepth
762 snapshotlimit = textlen >> snapshotdepth
761 if snapshotlimit < lowestrealisticdeltalen:
763 if snapshotlimit < lowestrealisticdeltalen:
762 # delta lower bound is larger than accepted upper bound
764 # delta lower bound is larger than accepted upper bound
763 tested.add(rev)
765 tested.add(rev)
764 continue
766 continue
765
767
766 # check the relative constraint on the delta size
768 # check the relative constraint on the delta size
767 revlength = revlog.length(rev)
769 revlength = revlog.length(rev)
768 if revlength < lowestrealisticdeltalen:
770 if revlength < lowestrealisticdeltalen:
769 # delta probable lower bound is larger than target base
771 # delta probable lower bound is larger than target base
770 tested.add(rev)
772 tested.add(rev)
771 continue
773 continue
772
774
773 group.append(rev)
775 group.append(rev)
774 if group:
776 if group:
775 # When the size of the candidate group is big, it can result in a
777 # When the size of the candidate group is big, it can result in a
776 # quite significant performance impact. To reduce this, we can send
778 # quite significant performance impact. To reduce this, we can send
777 # them in smaller batches until the new batch does not provide any
779 # them in smaller batches until the new batch does not provide any
778 # improvements.
780 # improvements.
779 #
781 #
780 # This might reduce the overall efficiency of the compression in
782 # This might reduce the overall efficiency of the compression in
781 # some corner cases, but that should also prevent very pathological
783 # some corner cases, but that should also prevent very pathological
782 # cases from being an issue. (eg. 20 000 candidates).
784 # cases from being an issue. (eg. 20 000 candidates).
783 #
785 #
784 # XXX note that the ordering of the group becomes important as it
786 # XXX note that the ordering of the group becomes important as it
785 # now impacts the final result. The current order is unprocessed
787 # now impacts the final result. The current order is unprocessed
786 # and can be improved.
788 # and can be improved.
787 if group_chunk_size == 0:
789 if group_chunk_size == 0:
788 tested.update(group)
790 tested.update(group)
789 good = yield tuple(group)
791 good = yield tuple(group)
790 else:
792 else:
791 prev_good = good
793 prev_good = good
792 for start in range(0, len(group), group_chunk_size):
794 for start in range(0, len(group), group_chunk_size):
793 sub_group = group[start : start + group_chunk_size]
795 sub_group = group[start : start + group_chunk_size]
794 tested.update(sub_group)
796 tested.update(sub_group)
795 good = yield tuple(sub_group)
797 good = yield tuple(sub_group)
796 if prev_good == good:
798 if prev_good == good:
797 break
799 break
798
800
799 yield None
801 yield None
800
802
801
803
802 def _refinedgroups(revlog, p1, p2, cachedelta):
804 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
803 good = None
805 good = None
804 # First we try to reuse a the delta contained in the bundle.
806 # First we try to reuse a the delta contained in the bundle.
805 # (or from the source revlog)
807 # (or from the source revlog)
806 #
808 #
807 # This logic only applies to general delta repositories and can be disabled
809 # This logic only applies to general delta repositories and can be disabled
808 # through configuration. Disabling reuse source delta is useful when
810 # through configuration. Disabling reuse source delta is useful when
809 # we want to make sure we recomputed "optimal" deltas.
811 # we want to make sure we recomputed "optimal" deltas.
810 debug_info = None
812 debug_info = None
811 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
813 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
812 # Assume what we received from the server is a good choice
814 # Assume what we received from the server is a good choice
813 # build delta will reuse the cache
815 # build delta will reuse the cache
814 if debug_info is not None:
816 if debug_info is not None:
815 debug_info['cached-delta.tested'] += 1
817 debug_info['cached-delta.tested'] += 1
816 good = yield (cachedelta[0],)
818 good = yield (cachedelta[0],)
817 if good is not None:
819 if good is not None:
818 if debug_info is not None:
820 if debug_info is not None:
819 debug_info['cached-delta.accepted'] += 1
821 debug_info['cached-delta.accepted'] += 1
820 yield None
822 yield None
821 return
823 return
822 # XXX cache me higher
824 if snapshot_cache is None:
823 snapshot_cache = SnapshotCache()
825 snapshot_cache = SnapshotCache()
824 groups = _rawgroups(
826 groups = _rawgroups(
825 revlog,
827 revlog,
826 p1,
828 p1,
827 p2,
829 p2,
828 cachedelta,
830 cachedelta,
829 snapshot_cache,
831 snapshot_cache,
830 )
832 )
831 for candidates in groups:
833 for candidates in groups:
832 good = yield candidates
834 good = yield candidates
833 if good is not None:
835 if good is not None:
834 break
836 break
835
837
836 # If sparse revlog is enabled, we can try to refine the available deltas
838 # If sparse revlog is enabled, we can try to refine the available deltas
837 if not revlog._sparserevlog:
839 if not revlog._sparserevlog:
838 yield None
840 yield None
839 return
841 return
840
842
841 # if we have a refinable value, try to refine it
843 # if we have a refinable value, try to refine it
842 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
844 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
843 # refine snapshot down
845 # refine snapshot down
844 previous = None
846 previous = None
845 while previous != good:
847 while previous != good:
846 previous = good
848 previous = good
847 base = revlog.deltaparent(good)
849 base = revlog.deltaparent(good)
848 if base == nullrev:
850 if base == nullrev:
849 break
851 break
850 good = yield (base,)
852 good = yield (base,)
851 # refine snapshot up
853 # refine snapshot up
852 if not snapshot_cache.snapshots:
854 if not snapshot_cache.snapshots:
853 snapshot_cache.update(revlog, good + 1)
855 snapshot_cache.update(revlog, good + 1)
854 previous = None
856 previous = None
855 while good != previous:
857 while good != previous:
856 previous = good
858 previous = good
857 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
859 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
858 good = yield children
860 good = yield children
859
861
860 if debug_info is not None:
862 if debug_info is not None:
861 if good is None:
863 if good is None:
862 debug_info['no-solution'] += 1
864 debug_info['no-solution'] += 1
863
865
864 yield None
866 yield None
865
867
866
868
867 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
869 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
868 """Provides group of revision to be tested as delta base
870 """Provides group of revision to be tested as delta base
869
871
870 This lower level function focus on emitting delta theorically interresting
872 This lower level function focus on emitting delta theorically interresting
871 without looking it any practical details.
873 without looking it any practical details.
872
874
873 The group order aims at providing fast or small candidates first.
875 The group order aims at providing fast or small candidates first.
874 """
876 """
875 gdelta = revlog._generaldelta
877 gdelta = revlog._generaldelta
876 # gate sparse behind general-delta because of issue6056
878 # gate sparse behind general-delta because of issue6056
877 sparse = gdelta and revlog._sparserevlog
879 sparse = gdelta and revlog._sparserevlog
878 curr = len(revlog)
880 curr = len(revlog)
879 prev = curr - 1
881 prev = curr - 1
880 deltachain = lambda rev: revlog._deltachain(rev)[0]
882 deltachain = lambda rev: revlog._deltachain(rev)[0]
881
883
882 if gdelta:
884 if gdelta:
883 # exclude already lazy tested base if any
885 # exclude already lazy tested base if any
884 parents = [p for p in (p1, p2) if p != nullrev]
886 parents = [p for p in (p1, p2) if p != nullrev]
885
887
886 if not revlog._deltabothparents and len(parents) == 2:
888 if not revlog._deltabothparents and len(parents) == 2:
887 parents.sort()
889 parents.sort()
888 # To minimize the chance of having to build a fulltext,
890 # To minimize the chance of having to build a fulltext,
889 # pick first whichever parent is closest to us (max rev)
891 # pick first whichever parent is closest to us (max rev)
890 yield (parents[1],)
892 yield (parents[1],)
891 # then the other one (min rev) if the first did not fit
893 # then the other one (min rev) if the first did not fit
892 yield (parents[0],)
894 yield (parents[0],)
893 elif len(parents) > 0:
895 elif len(parents) > 0:
894 # Test all parents (1 or 2), and keep the best candidate
896 # Test all parents (1 or 2), and keep the best candidate
895 yield parents
897 yield parents
896
898
897 if sparse and parents:
899 if sparse and parents:
898 if snapshot_cache is None:
900 if snapshot_cache is None:
899 # map: base-rev: [snapshot-revs]
901 # map: base-rev: [snapshot-revs]
900 snapshot_cache = SnapshotCache()
902 snapshot_cache = SnapshotCache()
901 # See if we can use an existing snapshot in the parent chains to use as
903 # See if we can use an existing snapshot in the parent chains to use as
902 # a base for a new intermediate-snapshot
904 # a base for a new intermediate-snapshot
903 #
905 #
904 # search for snapshot in parents delta chain
906 # search for snapshot in parents delta chain
905 # map: snapshot-level: snapshot-rev
907 # map: snapshot-level: snapshot-rev
906 parents_snaps = collections.defaultdict(set)
908 parents_snaps = collections.defaultdict(set)
907 candidate_chains = [deltachain(p) for p in parents]
909 candidate_chains = [deltachain(p) for p in parents]
908 for chain in candidate_chains:
910 for chain in candidate_chains:
909 for idx, s in enumerate(chain):
911 for idx, s in enumerate(chain):
910 if not revlog.issnapshot(s):
912 if not revlog.issnapshot(s):
911 break
913 break
912 parents_snaps[idx].add(s)
914 parents_snaps[idx].add(s)
913 snapfloor = min(parents_snaps[0]) + 1
915 snapfloor = min(parents_snaps[0]) + 1
914 snapshot_cache.update(revlog, snapfloor)
916 snapshot_cache.update(revlog, snapfloor)
915 # search for the highest "unrelated" revision
917 # search for the highest "unrelated" revision
916 #
918 #
917 # Adding snapshots used by "unrelated" revision increase the odd we
919 # Adding snapshots used by "unrelated" revision increase the odd we
918 # reuse an independant, yet better snapshot chain.
920 # reuse an independant, yet better snapshot chain.
919 #
921 #
920 # XXX instead of building a set of revisions, we could lazily enumerate
922 # XXX instead of building a set of revisions, we could lazily enumerate
921 # over the chains. That would be more efficient, however we stick to
923 # over the chains. That would be more efficient, however we stick to
922 # simple code for now.
924 # simple code for now.
923 all_revs = set()
925 all_revs = set()
924 for chain in candidate_chains:
926 for chain in candidate_chains:
925 all_revs.update(chain)
927 all_revs.update(chain)
926 other = None
928 other = None
927 for r in revlog.revs(prev, snapfloor):
929 for r in revlog.revs(prev, snapfloor):
928 if r not in all_revs:
930 if r not in all_revs:
929 other = r
931 other = r
930 break
932 break
931 if other is not None:
933 if other is not None:
932 # To avoid unfair competition, we won't use unrelated intermediate
934 # To avoid unfair competition, we won't use unrelated intermediate
933 # snapshot that are deeper than the ones from the parent delta
935 # snapshot that are deeper than the ones from the parent delta
934 # chain.
936 # chain.
935 max_depth = max(parents_snaps.keys())
937 max_depth = max(parents_snaps.keys())
936 chain = deltachain(other)
938 chain = deltachain(other)
937 for depth, s in enumerate(chain):
939 for depth, s in enumerate(chain):
938 if s < snapfloor:
940 if s < snapfloor:
939 continue
941 continue
940 if max_depth < depth:
942 if max_depth < depth:
941 break
943 break
942 if not revlog.issnapshot(s):
944 if not revlog.issnapshot(s):
943 break
945 break
944 parents_snaps[depth].add(s)
946 parents_snaps[depth].add(s)
945 # Test them as possible intermediate snapshot base
947 # Test them as possible intermediate snapshot base
946 # We test them from highest to lowest level. High level one are more
948 # We test them from highest to lowest level. High level one are more
947 # likely to result in small delta
949 # likely to result in small delta
948 floor = None
950 floor = None
949 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
951 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
950 siblings = set()
952 siblings = set()
951 for s in snaps:
953 for s in snaps:
952 siblings.update(snapshot_cache.snapshots[s])
954 siblings.update(snapshot_cache.snapshots[s])
953 # Before considering making a new intermediate snapshot, we check
955 # Before considering making a new intermediate snapshot, we check
954 # if an existing snapshot, children of base we consider, would be
956 # if an existing snapshot, children of base we consider, would be
955 # suitable.
957 # suitable.
956 #
958 #
957 # It give a change to reuse a delta chain "unrelated" to the
959 # It give a change to reuse a delta chain "unrelated" to the
958 # current revision instead of starting our own. Without such
960 # current revision instead of starting our own. Without such
959 # re-use, topological branches would keep reopening new chains.
961 # re-use, topological branches would keep reopening new chains.
960 # Creating more and more snapshot as the repository grow.
962 # Creating more and more snapshot as the repository grow.
961
963
962 if floor is not None:
964 if floor is not None:
963 # We only do this for siblings created after the one in our
965 # We only do this for siblings created after the one in our
964 # parent's delta chain. Those created before has less chances
966 # parent's delta chain. Those created before has less chances
965 # to be valid base since our ancestors had to create a new
967 # to be valid base since our ancestors had to create a new
966 # snapshot.
968 # snapshot.
967 siblings = [r for r in siblings if floor < r]
969 siblings = [r for r in siblings if floor < r]
968 yield tuple(sorted(siblings))
970 yield tuple(sorted(siblings))
969 # then test the base from our parent's delta chain.
971 # then test the base from our parent's delta chain.
970 yield tuple(sorted(snaps))
972 yield tuple(sorted(snaps))
971 floor = min(snaps)
973 floor = min(snaps)
972 # No suitable base found in the parent chain, search if any full
974 # No suitable base found in the parent chain, search if any full
973 # snapshots emitted since parent's base would be a suitable base for an
975 # snapshots emitted since parent's base would be a suitable base for an
974 # intermediate snapshot.
976 # intermediate snapshot.
975 #
977 #
976 # It give a chance to reuse a delta chain unrelated to the current
978 # It give a chance to reuse a delta chain unrelated to the current
977 # revisions instead of starting our own. Without such re-use,
979 # revisions instead of starting our own. Without such re-use,
978 # topological branches would keep reopening new full chains. Creating
980 # topological branches would keep reopening new full chains. Creating
979 # more and more snapshot as the repository grow.
981 # more and more snapshot as the repository grow.
980 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
982 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
981 yield tuple(sorted(full))
983 yield tuple(sorted(full))
982
984
983 if not sparse:
985 if not sparse:
984 # other approach failed try against prev to hopefully save us a
986 # other approach failed try against prev to hopefully save us a
985 # fulltext.
987 # fulltext.
986 yield (prev,)
988 yield (prev,)
987
989
988
990
989 class SnapshotCache:
991 class SnapshotCache:
990 __slots__ = ('snapshots', '_start_rev', '_end_rev')
992 __slots__ = ('snapshots', '_start_rev', '_end_rev')
991
993
992 def __init__(self):
994 def __init__(self):
993 self.snapshots = collections.defaultdict(set)
995 self.snapshots = collections.defaultdict(set)
994 self._start_rev = None
996 self._start_rev = None
995 self._end_rev = None
997 self._end_rev = None
996
998
997 def update(self, revlog, start_rev=0):
999 def update(self, revlog, start_rev=0):
998 """find snapshots from start_rev to tip"""
1000 """find snapshots from start_rev to tip"""
999 nb_revs = len(revlog)
1001 nb_revs = len(revlog)
1000 end_rev = nb_revs - 1
1002 end_rev = nb_revs - 1
1001 if start_rev > end_rev:
1003 if start_rev > end_rev:
1002 return # range is empty
1004 return # range is empty
1003
1005
1004 if self._start_rev is None:
1006 if self._start_rev is None:
1005 assert self._end_rev is None
1007 assert self._end_rev is None
1006 self._update(revlog, start_rev, end_rev)
1008 self._update(revlog, start_rev, end_rev)
1007 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1009 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1008 if start_rev < self._start_rev:
1010 if start_rev < self._start_rev:
1009 self._update(revlog, start_rev, self._start_rev - 1)
1011 self._update(revlog, start_rev, self._start_rev - 1)
1010 if self._end_rev < end_rev:
1012 if self._end_rev < end_rev:
1011 self._update(revlog, self._end_rev + 1, end_rev)
1013 self._update(revlog, self._end_rev + 1, end_rev)
1012
1014
1013 if self._start_rev is None:
1015 if self._start_rev is None:
1014 assert self._end_rev is None
1016 assert self._end_rev is None
1015 self._end_rev = end_rev
1017 self._end_rev = end_rev
1016 self._start_rev = start_rev
1018 self._start_rev = start_rev
1017 else:
1019 else:
1018 self._start_rev = min(self._start_rev, start_rev)
1020 self._start_rev = min(self._start_rev, start_rev)
1019 self._end_rev = max(self._end_rev, end_rev)
1021 self._end_rev = max(self._end_rev, end_rev)
1020 assert self._start_rev <= self._end_rev, (
1022 assert self._start_rev <= self._end_rev, (
1021 self._start_rev,
1023 self._start_rev,
1022 self._end_rev,
1024 self._end_rev,
1023 )
1025 )
1024
1026
1025 def _update(self, revlog, start_rev, end_rev):
1027 def _update(self, revlog, start_rev, end_rev):
1026 """internal method that actually do update content"""
1028 """internal method that actually do update content"""
1027 assert self._start_rev is None or (
1029 assert self._start_rev is None or (
1028 start_rev < self._start_rev or start_rev > self._end_rev
1030 start_rev < self._start_rev or start_rev > self._end_rev
1029 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1031 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1030 assert self._start_rev is None or (
1032 assert self._start_rev is None or (
1031 end_rev < self._start_rev or end_rev > self._end_rev
1033 end_rev < self._start_rev or end_rev > self._end_rev
1032 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1034 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1033 cache = self.snapshots
1035 cache = self.snapshots
1034 if util.safehasattr(revlog.index, b'findsnapshots'):
1036 if util.safehasattr(revlog.index, b'findsnapshots'):
1035 revlog.index.findsnapshots(cache, start_rev, end_rev)
1037 revlog.index.findsnapshots(cache, start_rev, end_rev)
1036 else:
1038 else:
1037 deltaparent = revlog.deltaparent
1039 deltaparent = revlog.deltaparent
1038 issnapshot = revlog.issnapshot
1040 issnapshot = revlog.issnapshot
1039 for rev in revlog.revs(start_rev, end_rev):
1041 for rev in revlog.revs(start_rev, end_rev):
1040 if issnapshot(rev):
1042 if issnapshot(rev):
1041 cache[deltaparent(rev)].add(rev)
1043 cache[deltaparent(rev)].add(rev)
1042
1044
1043
1045
1044 class deltacomputer:
1046 class deltacomputer:
1045 def __init__(
1047 def __init__(
1046 self,
1048 self,
1047 revlog,
1049 revlog,
1048 write_debug=None,
1050 write_debug=None,
1049 debug_search=False,
1051 debug_search=False,
1050 debug_info=None,
1052 debug_info=None,
1051 ):
1053 ):
1052 self.revlog = revlog
1054 self.revlog = revlog
1053 self._write_debug = write_debug
1055 self._write_debug = write_debug
1054 self._debug_search = debug_search
1056 self._debug_search = debug_search
1055 self._debug_info = debug_info
1057 self._debug_info = debug_info
1058 self._snapshot_cache = SnapshotCache()
1056
1059
1057 def buildtext(self, revinfo, fh):
1060 def buildtext(self, revinfo, fh):
1058 """Builds a fulltext version of a revision
1061 """Builds a fulltext version of a revision
1059
1062
1060 revinfo: revisioninfo instance that contains all needed info
1063 revinfo: revisioninfo instance that contains all needed info
1061 fh: file handle to either the .i or the .d revlog file,
1064 fh: file handle to either the .i or the .d revlog file,
1062 depending on whether it is inlined or not
1065 depending on whether it is inlined or not
1063 """
1066 """
1064 btext = revinfo.btext
1067 btext = revinfo.btext
1065 if btext[0] is not None:
1068 if btext[0] is not None:
1066 return btext[0]
1069 return btext[0]
1067
1070
1068 revlog = self.revlog
1071 revlog = self.revlog
1069 cachedelta = revinfo.cachedelta
1072 cachedelta = revinfo.cachedelta
1070 baserev = cachedelta[0]
1073 baserev = cachedelta[0]
1071 delta = cachedelta[1]
1074 delta = cachedelta[1]
1072
1075
1073 fulltext = btext[0] = _textfromdelta(
1076 fulltext = btext[0] = _textfromdelta(
1074 fh,
1077 fh,
1075 revlog,
1078 revlog,
1076 baserev,
1079 baserev,
1077 delta,
1080 delta,
1078 revinfo.p1,
1081 revinfo.p1,
1079 revinfo.p2,
1082 revinfo.p2,
1080 revinfo.flags,
1083 revinfo.flags,
1081 revinfo.node,
1084 revinfo.node,
1082 )
1085 )
1083 return fulltext
1086 return fulltext
1084
1087
1085 def _builddeltadiff(self, base, revinfo, fh):
1088 def _builddeltadiff(self, base, revinfo, fh):
1086 revlog = self.revlog
1089 revlog = self.revlog
1087 t = self.buildtext(revinfo, fh)
1090 t = self.buildtext(revinfo, fh)
1088 if revlog.iscensored(base):
1091 if revlog.iscensored(base):
1089 # deltas based on a censored revision must replace the
1092 # deltas based on a censored revision must replace the
1090 # full content in one patch, so delta works everywhere
1093 # full content in one patch, so delta works everywhere
1091 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1094 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1092 delta = header + t
1095 delta = header + t
1093 else:
1096 else:
1094 ptext = revlog.rawdata(base, _df=fh)
1097 ptext = revlog.rawdata(base, _df=fh)
1095 delta = mdiff.textdiff(ptext, t)
1098 delta = mdiff.textdiff(ptext, t)
1096
1099
1097 return delta
1100 return delta
1098
1101
1099 def _builddeltainfo(self, revinfo, base, fh):
1102 def _builddeltainfo(self, revinfo, base, fh):
1100 # can we use the cached delta?
1103 # can we use the cached delta?
1101 revlog = self.revlog
1104 revlog = self.revlog
1102 debug_search = self._write_debug is not None and self._debug_search
1105 debug_search = self._write_debug is not None and self._debug_search
1103 chainbase = revlog.chainbase(base)
1106 chainbase = revlog.chainbase(base)
1104 if revlog._generaldelta:
1107 if revlog._generaldelta:
1105 deltabase = base
1108 deltabase = base
1106 else:
1109 else:
1107 deltabase = chainbase
1110 deltabase = chainbase
1108 snapshotdepth = None
1111 snapshotdepth = None
1109 if revlog._sparserevlog and deltabase == nullrev:
1112 if revlog._sparserevlog and deltabase == nullrev:
1110 snapshotdepth = 0
1113 snapshotdepth = 0
1111 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1114 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1112 # A delta chain should always be one full snapshot,
1115 # A delta chain should always be one full snapshot,
1113 # zero or more semi-snapshots, and zero or more deltas
1116 # zero or more semi-snapshots, and zero or more deltas
1114 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1117 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1115 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1118 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1116 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1119 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1117 delta = None
1120 delta = None
1118 if revinfo.cachedelta:
1121 if revinfo.cachedelta:
1119 cachebase = revinfo.cachedelta[0]
1122 cachebase = revinfo.cachedelta[0]
1120 # check if the diff still apply
1123 # check if the diff still apply
1121 currentbase = cachebase
1124 currentbase = cachebase
1122 while (
1125 while (
1123 currentbase != nullrev
1126 currentbase != nullrev
1124 and currentbase != base
1127 and currentbase != base
1125 and self.revlog.length(currentbase) == 0
1128 and self.revlog.length(currentbase) == 0
1126 ):
1129 ):
1127 currentbase = self.revlog.deltaparent(currentbase)
1130 currentbase = self.revlog.deltaparent(currentbase)
1128 if self.revlog._lazydelta and currentbase == base:
1131 if self.revlog._lazydelta and currentbase == base:
1129 delta = revinfo.cachedelta[1]
1132 delta = revinfo.cachedelta[1]
1130 if delta is None:
1133 if delta is None:
1131 delta = self._builddeltadiff(base, revinfo, fh)
1134 delta = self._builddeltadiff(base, revinfo, fh)
1132 if debug_search:
1135 if debug_search:
1133 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1136 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1134 msg %= len(delta)
1137 msg %= len(delta)
1135 self._write_debug(msg)
1138 self._write_debug(msg)
1136 # snapshotdept need to be neither None nor 0 level snapshot
1139 # snapshotdept need to be neither None nor 0 level snapshot
1137 if revlog.upperboundcomp is not None and snapshotdepth:
1140 if revlog.upperboundcomp is not None and snapshotdepth:
1138 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1141 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1139 snapshotlimit = revinfo.textlen >> snapshotdepth
1142 snapshotlimit = revinfo.textlen >> snapshotdepth
1140 if debug_search:
1143 if debug_search:
1141 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1144 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1142 msg %= lowestrealisticdeltalen
1145 msg %= lowestrealisticdeltalen
1143 self._write_debug(msg)
1146 self._write_debug(msg)
1144 if snapshotlimit < lowestrealisticdeltalen:
1147 if snapshotlimit < lowestrealisticdeltalen:
1145 if debug_search:
1148 if debug_search:
1146 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1149 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1147 self._write_debug(msg)
1150 self._write_debug(msg)
1148 return None
1151 return None
1149 if revlog.length(base) < lowestrealisticdeltalen:
1152 if revlog.length(base) < lowestrealisticdeltalen:
1150 if debug_search:
1153 if debug_search:
1151 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1154 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1152 self._write_debug(msg)
1155 self._write_debug(msg)
1153 return None
1156 return None
1154 header, data = revlog.compress(delta)
1157 header, data = revlog.compress(delta)
1155 deltalen = len(header) + len(data)
1158 deltalen = len(header) + len(data)
1156 offset = revlog.end(len(revlog) - 1)
1159 offset = revlog.end(len(revlog) - 1)
1157 dist = deltalen + offset - revlog.start(chainbase)
1160 dist = deltalen + offset - revlog.start(chainbase)
1158 chainlen, compresseddeltalen = revlog._chaininfo(base)
1161 chainlen, compresseddeltalen = revlog._chaininfo(base)
1159 chainlen += 1
1162 chainlen += 1
1160 compresseddeltalen += deltalen
1163 compresseddeltalen += deltalen
1161
1164
1162 return _deltainfo(
1165 return _deltainfo(
1163 dist,
1166 dist,
1164 deltalen,
1167 deltalen,
1165 (header, data),
1168 (header, data),
1166 deltabase,
1169 deltabase,
1167 chainbase,
1170 chainbase,
1168 chainlen,
1171 chainlen,
1169 compresseddeltalen,
1172 compresseddeltalen,
1170 snapshotdepth,
1173 snapshotdepth,
1171 )
1174 )
1172
1175
1173 def _fullsnapshotinfo(self, fh, revinfo, curr):
1176 def _fullsnapshotinfo(self, fh, revinfo, curr):
1174 rawtext = self.buildtext(revinfo, fh)
1177 rawtext = self.buildtext(revinfo, fh)
1175 data = self.revlog.compress(rawtext)
1178 data = self.revlog.compress(rawtext)
1176 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1179 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1177 deltabase = chainbase = curr
1180 deltabase = chainbase = curr
1178 snapshotdepth = 0
1181 snapshotdepth = 0
1179 chainlen = 1
1182 chainlen = 1
1180
1183
1181 return _deltainfo(
1184 return _deltainfo(
1182 dist,
1185 dist,
1183 deltalen,
1186 deltalen,
1184 data,
1187 data,
1185 deltabase,
1188 deltabase,
1186 chainbase,
1189 chainbase,
1187 chainlen,
1190 chainlen,
1188 compresseddeltalen,
1191 compresseddeltalen,
1189 snapshotdepth,
1192 snapshotdepth,
1190 )
1193 )
1191
1194
1192 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1195 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1193 """Find an acceptable delta against a candidate revision
1196 """Find an acceptable delta against a candidate revision
1194
1197
1195 revinfo: information about the revision (instance of _revisioninfo)
1198 revinfo: information about the revision (instance of _revisioninfo)
1196 fh: file handle to either the .i or the .d revlog file,
1199 fh: file handle to either the .i or the .d revlog file,
1197 depending on whether it is inlined or not
1200 depending on whether it is inlined or not
1198
1201
1199 Returns the first acceptable candidate revision, as ordered by
1202 Returns the first acceptable candidate revision, as ordered by
1200 _candidategroups
1203 _candidategroups
1201
1204
1202 If no suitable deltabase is found, we return delta info for a full
1205 If no suitable deltabase is found, we return delta info for a full
1203 snapshot.
1206 snapshot.
1204
1207
1205 `excluded_bases` is an optional set of revision that cannot be used as
1208 `excluded_bases` is an optional set of revision that cannot be used as
1206 a delta base. Use this to recompute delta suitable in censor or strip
1209 a delta base. Use this to recompute delta suitable in censor or strip
1207 context.
1210 context.
1208 """
1211 """
1209 if target_rev is None:
1212 if target_rev is None:
1210 target_rev = len(self.revlog)
1213 target_rev = len(self.revlog)
1211
1214
1212 if not revinfo.textlen:
1215 if not revinfo.textlen:
1213 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1216 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1214
1217
1215 if excluded_bases is None:
1218 if excluded_bases is None:
1216 excluded_bases = set()
1219 excluded_bases = set()
1217
1220
1218 # no delta for flag processor revision (see "candelta" for why)
1221 # no delta for flag processor revision (see "candelta" for why)
1219 # not calling candelta since only one revision needs test, also to
1222 # not calling candelta since only one revision needs test, also to
1220 # avoid overhead fetching flags again.
1223 # avoid overhead fetching flags again.
1221 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1224 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1222 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1225 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1223
1226
1224 gather_debug = (
1227 gather_debug = (
1225 self._write_debug is not None or self._debug_info is not None
1228 self._write_debug is not None or self._debug_info is not None
1226 )
1229 )
1227 debug_search = self._write_debug is not None and self._debug_search
1230 debug_search = self._write_debug is not None and self._debug_search
1228
1231
1229 if gather_debug:
1232 if gather_debug:
1230 start = util.timer()
1233 start = util.timer()
1231
1234
1232 # count the number of different delta we tried (for debug purpose)
1235 # count the number of different delta we tried (for debug purpose)
1233 dbg_try_count = 0
1236 dbg_try_count = 0
1234 # count the number of "search round" we did. (for debug purpose)
1237 # count the number of "search round" we did. (for debug purpose)
1235 dbg_try_rounds = 0
1238 dbg_try_rounds = 0
1236 dbg_type = b'unknown'
1239 dbg_type = b'unknown'
1237
1240
1238 cachedelta = revinfo.cachedelta
1241 cachedelta = revinfo.cachedelta
1239 p1 = revinfo.p1
1242 p1 = revinfo.p1
1240 p2 = revinfo.p2
1243 p2 = revinfo.p2
1241 revlog = self.revlog
1244 revlog = self.revlog
1242
1245
1243 deltainfo = None
1246 deltainfo = None
1244 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1247 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1245
1248
1246 if gather_debug:
1249 if gather_debug:
1247 if p1r != nullrev:
1250 if p1r != nullrev:
1248 p1_chain_len = revlog._chaininfo(p1r)[0]
1251 p1_chain_len = revlog._chaininfo(p1r)[0]
1249 else:
1252 else:
1250 p1_chain_len = -1
1253 p1_chain_len = -1
1251 if p2r != nullrev:
1254 if p2r != nullrev:
1252 p2_chain_len = revlog._chaininfo(p2r)[0]
1255 p2_chain_len = revlog._chaininfo(p2r)[0]
1253 else:
1256 else:
1254 p2_chain_len = -1
1257 p2_chain_len = -1
1255 if debug_search:
1258 if debug_search:
1256 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1259 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1257 msg %= target_rev
1260 msg %= target_rev
1258 self._write_debug(msg)
1261 self._write_debug(msg)
1259
1262
1260 groups = _candidategroups(
1263 groups = _candidategroups(
1261 self.revlog,
1264 self.revlog,
1262 revinfo.textlen,
1265 revinfo.textlen,
1263 p1r,
1266 p1r,
1264 p2r,
1267 p2r,
1265 cachedelta,
1268 cachedelta,
1266 excluded_bases,
1269 excluded_bases,
1267 target_rev,
1270 target_rev,
1271 snapshot_cache=self._snapshot_cache,
1268 )
1272 )
1269 candidaterevs = next(groups)
1273 candidaterevs = next(groups)
1270 while candidaterevs is not None:
1274 while candidaterevs is not None:
1271 dbg_try_rounds += 1
1275 dbg_try_rounds += 1
1272 if debug_search:
1276 if debug_search:
1273 prev = None
1277 prev = None
1274 if deltainfo is not None:
1278 if deltainfo is not None:
1275 prev = deltainfo.base
1279 prev = deltainfo.base
1276
1280
1277 if (
1281 if (
1278 cachedelta is not None
1282 cachedelta is not None
1279 and len(candidaterevs) == 1
1283 and len(candidaterevs) == 1
1280 and cachedelta[0] in candidaterevs
1284 and cachedelta[0] in candidaterevs
1281 ):
1285 ):
1282 round_type = b"cached-delta"
1286 round_type = b"cached-delta"
1283 elif p1 in candidaterevs or p2 in candidaterevs:
1287 elif p1 in candidaterevs or p2 in candidaterevs:
1284 round_type = b"parents"
1288 round_type = b"parents"
1285 elif prev is not None and all(c < prev for c in candidaterevs):
1289 elif prev is not None and all(c < prev for c in candidaterevs):
1286 round_type = b"refine-down"
1290 round_type = b"refine-down"
1287 elif prev is not None and all(c > prev for c in candidaterevs):
1291 elif prev is not None and all(c > prev for c in candidaterevs):
1288 round_type = b"refine-up"
1292 round_type = b"refine-up"
1289 else:
1293 else:
1290 round_type = b"search-down"
1294 round_type = b"search-down"
1291 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1295 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1292 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1296 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1293 self._write_debug(msg)
1297 self._write_debug(msg)
1294 nominateddeltas = []
1298 nominateddeltas = []
1295 if deltainfo is not None:
1299 if deltainfo is not None:
1296 if debug_search:
1300 if debug_search:
1297 msg = (
1301 msg = (
1298 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1302 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1299 )
1303 )
1300 msg %= (deltainfo.base, deltainfo.deltalen)
1304 msg %= (deltainfo.base, deltainfo.deltalen)
1301 self._write_debug(msg)
1305 self._write_debug(msg)
1302 # if we already found a good delta,
1306 # if we already found a good delta,
1303 # challenge it against refined candidates
1307 # challenge it against refined candidates
1304 nominateddeltas.append(deltainfo)
1308 nominateddeltas.append(deltainfo)
1305 for candidaterev in candidaterevs:
1309 for candidaterev in candidaterevs:
1306 if debug_search:
1310 if debug_search:
1307 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1311 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1308 msg %= candidaterev
1312 msg %= candidaterev
1309 self._write_debug(msg)
1313 self._write_debug(msg)
1310 candidate_type = None
1314 candidate_type = None
1311 if candidaterev == p1:
1315 if candidaterev == p1:
1312 candidate_type = b"p1"
1316 candidate_type = b"p1"
1313 elif candidaterev == p2:
1317 elif candidaterev == p2:
1314 candidate_type = b"p2"
1318 candidate_type = b"p2"
1315 elif self.revlog.issnapshot(candidaterev):
1319 elif self.revlog.issnapshot(candidaterev):
1316 candidate_type = b"snapshot-%d"
1320 candidate_type = b"snapshot-%d"
1317 candidate_type %= self.revlog.snapshotdepth(
1321 candidate_type %= self.revlog.snapshotdepth(
1318 candidaterev
1322 candidaterev
1319 )
1323 )
1320
1324
1321 if candidate_type is not None:
1325 if candidate_type is not None:
1322 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1326 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1323 msg %= candidate_type
1327 msg %= candidate_type
1324 self._write_debug(msg)
1328 self._write_debug(msg)
1325 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1329 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1326 msg %= self.revlog.length(candidaterev)
1330 msg %= self.revlog.length(candidaterev)
1327 self._write_debug(msg)
1331 self._write_debug(msg)
1328 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1332 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1329 msg %= self.revlog.deltaparent(candidaterev)
1333 msg %= self.revlog.deltaparent(candidaterev)
1330 self._write_debug(msg)
1334 self._write_debug(msg)
1331
1335
1332 dbg_try_count += 1
1336 dbg_try_count += 1
1333
1337
1334 if debug_search:
1338 if debug_search:
1335 delta_start = util.timer()
1339 delta_start = util.timer()
1336 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1340 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1337 if debug_search:
1341 if debug_search:
1338 delta_end = util.timer()
1342 delta_end = util.timer()
1339 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1343 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1340 msg %= delta_end - delta_start
1344 msg %= delta_end - delta_start
1341 self._write_debug(msg)
1345 self._write_debug(msg)
1342 if candidatedelta is not None:
1346 if candidatedelta is not None:
1343 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1347 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1344 if debug_search:
1348 if debug_search:
1345 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1349 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1346 msg %= candidatedelta.deltalen
1350 msg %= candidatedelta.deltalen
1347 self._write_debug(msg)
1351 self._write_debug(msg)
1348 nominateddeltas.append(candidatedelta)
1352 nominateddeltas.append(candidatedelta)
1349 elif debug_search:
1353 elif debug_search:
1350 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1354 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1351 msg %= candidatedelta.deltalen
1355 msg %= candidatedelta.deltalen
1352 self._write_debug(msg)
1356 self._write_debug(msg)
1353 elif debug_search:
1357 elif debug_search:
1354 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1358 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1355 self._write_debug(msg)
1359 self._write_debug(msg)
1356 if nominateddeltas:
1360 if nominateddeltas:
1357 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1361 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1358 if deltainfo is not None:
1362 if deltainfo is not None:
1359 candidaterevs = groups.send(deltainfo.base)
1363 candidaterevs = groups.send(deltainfo.base)
1360 else:
1364 else:
1361 candidaterevs = next(groups)
1365 candidaterevs = next(groups)
1362
1366
1363 if deltainfo is None:
1367 if deltainfo is None:
1364 dbg_type = b"full"
1368 dbg_type = b"full"
1365 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1369 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1366 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1370 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1367 dbg_type = b"snapshot"
1371 dbg_type = b"snapshot"
1368 else:
1372 else:
1369 dbg_type = b"delta"
1373 dbg_type = b"delta"
1370
1374
1371 if gather_debug:
1375 if gather_debug:
1372 end = util.timer()
1376 end = util.timer()
1373 used_cached = (
1377 used_cached = (
1374 cachedelta is not None
1378 cachedelta is not None
1375 and dbg_try_rounds == 1
1379 and dbg_try_rounds == 1
1376 and dbg_try_count == 1
1380 and dbg_try_count == 1
1377 and deltainfo.base == cachedelta[0]
1381 and deltainfo.base == cachedelta[0]
1378 )
1382 )
1379 dbg = {
1383 dbg = {
1380 'duration': end - start,
1384 'duration': end - start,
1381 'revision': target_rev,
1385 'revision': target_rev,
1382 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1386 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1383 'search_round_count': dbg_try_rounds,
1387 'search_round_count': dbg_try_rounds,
1384 'using-cached-base': used_cached,
1388 'using-cached-base': used_cached,
1385 'delta_try_count': dbg_try_count,
1389 'delta_try_count': dbg_try_count,
1386 'type': dbg_type,
1390 'type': dbg_type,
1387 'p1-chain-len': p1_chain_len,
1391 'p1-chain-len': p1_chain_len,
1388 'p2-chain-len': p2_chain_len,
1392 'p2-chain-len': p2_chain_len,
1389 }
1393 }
1390 if (
1394 if (
1391 deltainfo.snapshotdepth # pytype: disable=attribute-error
1395 deltainfo.snapshotdepth # pytype: disable=attribute-error
1392 is not None
1396 is not None
1393 ):
1397 ):
1394 dbg[
1398 dbg[
1395 'snapshot-depth'
1399 'snapshot-depth'
1396 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1400 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1397 else:
1401 else:
1398 dbg['snapshot-depth'] = 0
1402 dbg['snapshot-depth'] = 0
1399 target_revlog = b"UNKNOWN"
1403 target_revlog = b"UNKNOWN"
1400 target_type = self.revlog.target[0]
1404 target_type = self.revlog.target[0]
1401 target_key = self.revlog.target[1]
1405 target_key = self.revlog.target[1]
1402 if target_type == KIND_CHANGELOG:
1406 if target_type == KIND_CHANGELOG:
1403 target_revlog = b'CHANGELOG:'
1407 target_revlog = b'CHANGELOG:'
1404 elif target_type == KIND_MANIFESTLOG:
1408 elif target_type == KIND_MANIFESTLOG:
1405 target_revlog = b'MANIFESTLOG:'
1409 target_revlog = b'MANIFESTLOG:'
1406 if target_key:
1410 if target_key:
1407 target_revlog += b'%s:' % target_key
1411 target_revlog += b'%s:' % target_key
1408 elif target_type == KIND_FILELOG:
1412 elif target_type == KIND_FILELOG:
1409 target_revlog = b'FILELOG:'
1413 target_revlog = b'FILELOG:'
1410 if target_key:
1414 if target_key:
1411 target_revlog += b'%s:' % target_key
1415 target_revlog += b'%s:' % target_key
1412 dbg['target-revlog'] = target_revlog
1416 dbg['target-revlog'] = target_revlog
1413
1417
1414 if self._debug_info is not None:
1418 if self._debug_info is not None:
1415 self._debug_info.append(dbg)
1419 self._debug_info.append(dbg)
1416
1420
1417 if self._write_debug is not None:
1421 if self._write_debug is not None:
1418 msg = (
1422 msg = (
1419 b"DBG-DELTAS:"
1423 b"DBG-DELTAS:"
1420 b" %-12s"
1424 b" %-12s"
1421 b" rev=%d:"
1425 b" rev=%d:"
1422 b" delta-base=%d"
1426 b" delta-base=%d"
1423 b" is-cached=%d"
1427 b" is-cached=%d"
1424 b" - search-rounds=%d"
1428 b" - search-rounds=%d"
1425 b" try-count=%d"
1429 b" try-count=%d"
1426 b" - delta-type=%-6s"
1430 b" - delta-type=%-6s"
1427 b" snap-depth=%d"
1431 b" snap-depth=%d"
1428 b" - p1-chain-length=%d"
1432 b" - p1-chain-length=%d"
1429 b" p2-chain-length=%d"
1433 b" p2-chain-length=%d"
1430 b" - duration=%f"
1434 b" - duration=%f"
1431 b"\n"
1435 b"\n"
1432 )
1436 )
1433 msg %= (
1437 msg %= (
1434 dbg["target-revlog"],
1438 dbg["target-revlog"],
1435 dbg["revision"],
1439 dbg["revision"],
1436 dbg["delta-base"],
1440 dbg["delta-base"],
1437 dbg["using-cached-base"],
1441 dbg["using-cached-base"],
1438 dbg["search_round_count"],
1442 dbg["search_round_count"],
1439 dbg["delta_try_count"],
1443 dbg["delta_try_count"],
1440 dbg["type"],
1444 dbg["type"],
1441 dbg["snapshot-depth"],
1445 dbg["snapshot-depth"],
1442 dbg["p1-chain-len"],
1446 dbg["p1-chain-len"],
1443 dbg["p2-chain-len"],
1447 dbg["p2-chain-len"],
1444 dbg["duration"],
1448 dbg["duration"],
1445 )
1449 )
1446 self._write_debug(msg)
1450 self._write_debug(msg)
1447 return deltainfo
1451 return deltainfo
1448
1452
1449
1453
1450 def delta_compression(default_compression_header, deltainfo):
1454 def delta_compression(default_compression_header, deltainfo):
1451 """return (COMPRESSION_MODE, deltainfo)
1455 """return (COMPRESSION_MODE, deltainfo)
1452
1456
1453 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1457 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1454 compression.
1458 compression.
1455 """
1459 """
1456 h, d = deltainfo.data
1460 h, d = deltainfo.data
1457 compression_mode = COMP_MODE_INLINE
1461 compression_mode = COMP_MODE_INLINE
1458 if not h and not d:
1462 if not h and not d:
1459 # not data to store at all... declare them uncompressed
1463 # not data to store at all... declare them uncompressed
1460 compression_mode = COMP_MODE_PLAIN
1464 compression_mode = COMP_MODE_PLAIN
1461 elif not h:
1465 elif not h:
1462 t = d[0:1]
1466 t = d[0:1]
1463 if t == b'\0':
1467 if t == b'\0':
1464 compression_mode = COMP_MODE_PLAIN
1468 compression_mode = COMP_MODE_PLAIN
1465 elif t == default_compression_header:
1469 elif t == default_compression_header:
1466 compression_mode = COMP_MODE_DEFAULT
1470 compression_mode = COMP_MODE_DEFAULT
1467 elif h == b'u':
1471 elif h == b'u':
1468 # we have a more efficient way to declare uncompressed
1472 # we have a more efficient way to declare uncompressed
1469 h = b''
1473 h = b''
1470 compression_mode = COMP_MODE_PLAIN
1474 compression_mode = COMP_MODE_PLAIN
1471 deltainfo = drop_u_compression(deltainfo)
1475 deltainfo = drop_u_compression(deltainfo)
1472 return compression_mode, deltainfo
1476 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now