##// END OF EJS Templates
revlog: add a ways to blacklist some revision when searching for a delta...
marmoute -
r48193:1efe3cde default
parent child Browse files
Show More
@@ -1,1106 +1,1115
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18 from ..pycompat import getattr
18 from ..pycompat import getattr
19
19
20 from .constants import (
20 from .constants import (
21 REVIDX_ISCENSORED,
21 REVIDX_ISCENSORED,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
22 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 )
23 )
24
24
25 from ..thirdparty import attr
25 from ..thirdparty import attr
26
26
27 from .. import (
27 from .. import (
28 error,
28 error,
29 mdiff,
29 mdiff,
30 util,
30 util,
31 )
31 )
32
32
33 from . import flagutil
33 from . import flagutil
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
47 self._snapshot = set(snapshot)
48 self.index = None
48 self.index = None
49
49
50 def start(self, rev):
50 def start(self, rev):
51 if rev == nullrev:
51 if rev == nullrev:
52 return 0
52 return 0
53 if rev == 0:
53 if rev == 0:
54 return 0
54 return 0
55 return self._data[rev - 1]
55 return self._data[rev - 1]
56
56
57 def end(self, rev):
57 def end(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 return self._data[rev]
60 return self._data[rev]
61
61
62 def length(self, rev):
62 def length(self, rev):
63 return self.end(rev) - self.start(rev)
63 return self.end(rev) - self.start(rev)
64
64
65 def __len__(self):
65 def __len__(self):
66 return len(self._data)
66 return len(self._data)
67
67
68 def issnapshot(self, rev):
68 def issnapshot(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return True
70 return True
71 return rev in self._snapshot
71 return rev in self._snapshot
72
72
73
73
74 def slicechunk(revlog, revs, targetsize=None):
74 def slicechunk(revlog, revs, targetsize=None):
75 """slice revs to reduce the amount of unrelated data to be read from disk.
75 """slice revs to reduce the amount of unrelated data to be read from disk.
76
76
77 ``revs`` is sliced into groups that should be read in one time.
77 ``revs`` is sliced into groups that should be read in one time.
78 Assume that revs are sorted.
78 Assume that revs are sorted.
79
79
80 The initial chunk is sliced until the overall density (payload/chunks-span
80 The initial chunk is sliced until the overall density (payload/chunks-span
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
82 `revlog._srmingapsize` is skipped.
82 `revlog._srmingapsize` is skipped.
83
83
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
85 For consistency with other slicing choice, this limit won't go lower than
85 For consistency with other slicing choice, this limit won't go lower than
86 `revlog._srmingapsize`.
86 `revlog._srmingapsize`.
87
87
88 If individual revisions chunk are larger than this limit, they will still
88 If individual revisions chunk are larger than this limit, they will still
89 be raised individually.
89 be raised individually.
90
90
91 >>> data = [
91 >>> data = [
92 ... 5, #00 (5)
92 ... 5, #00 (5)
93 ... 10, #01 (5)
93 ... 10, #01 (5)
94 ... 12, #02 (2)
94 ... 12, #02 (2)
95 ... 12, #03 (empty)
95 ... 12, #03 (empty)
96 ... 27, #04 (15)
96 ... 27, #04 (15)
97 ... 31, #05 (4)
97 ... 31, #05 (4)
98 ... 31, #06 (empty)
98 ... 31, #06 (empty)
99 ... 42, #07 (11)
99 ... 42, #07 (11)
100 ... 47, #08 (5)
100 ... 47, #08 (5)
101 ... 47, #09 (empty)
101 ... 47, #09 (empty)
102 ... 48, #10 (1)
102 ... 48, #10 (1)
103 ... 51, #11 (3)
103 ... 51, #11 (3)
104 ... 74, #12 (23)
104 ... 74, #12 (23)
105 ... 85, #13 (11)
105 ... 85, #13 (11)
106 ... 86, #14 (1)
106 ... 86, #14 (1)
107 ... 91, #15 (5)
107 ... 91, #15 (5)
108 ... ]
108 ... ]
109 >>> revlog = _testrevlog(data, snapshot=range(16))
109 >>> revlog = _testrevlog(data, snapshot=range(16))
110
110
111 >>> list(slicechunk(revlog, list(range(16))))
111 >>> list(slicechunk(revlog, list(range(16))))
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
113 >>> list(slicechunk(revlog, [0, 15]))
113 >>> list(slicechunk(revlog, [0, 15]))
114 [[0], [15]]
114 [[0], [15]]
115 >>> list(slicechunk(revlog, [0, 11, 15]))
115 >>> list(slicechunk(revlog, [0, 11, 15]))
116 [[0], [11], [15]]
116 [[0], [11], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
118 [[0], [11, 13, 15]]
118 [[0], [11, 13, 15]]
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
120 [[1, 2], [5, 8, 10, 11], [14]]
120 [[1, 2], [5, 8, 10, 11], [14]]
121
121
122 Slicing with a maximum chunk size
122 Slicing with a maximum chunk size
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
124 [[0], [11], [13], [15]]
124 [[0], [11], [13], [15]]
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
126 [[0], [11], [13, 15]]
126 [[0], [11], [13, 15]]
127
127
128 Slicing involving nullrev
128 Slicing involving nullrev
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
130 [[-1, 0], [11], [13, 15]]
130 [[-1, 0], [11], [13, 15]]
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
132 [[-1], [13], [15]]
132 [[-1], [13], [15]]
133 """
133 """
134 if targetsize is not None:
134 if targetsize is not None:
135 targetsize = max(targetsize, revlog._srmingapsize)
135 targetsize = max(targetsize, revlog._srmingapsize)
136 # targetsize should not be specified when evaluating delta candidates:
136 # targetsize should not be specified when evaluating delta candidates:
137 # * targetsize is used to ensure we stay within specification when reading,
137 # * targetsize is used to ensure we stay within specification when reading,
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
139 if densityslicing is None:
139 if densityslicing is None:
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
141 for chunk in densityslicing(
141 for chunk in densityslicing(
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
142 revs, revlog._srdensitythreshold, revlog._srmingapsize
143 ):
143 ):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
145 yield subchunk
145 yield subchunk
146
146
147
147
148 def _slicechunktosize(revlog, revs, targetsize=None):
148 def _slicechunktosize(revlog, revs, targetsize=None):
149 """slice revs to match the target size
149 """slice revs to match the target size
150
150
151 This is intended to be used on chunk that density slicing selected by that
151 This is intended to be used on chunk that density slicing selected by that
152 are still too large compared to the read garantee of revlog. This might
152 are still too large compared to the read garantee of revlog. This might
153 happens when "minimal gap size" interrupted the slicing or when chain are
153 happens when "minimal gap size" interrupted the slicing or when chain are
154 built in a way that create large blocks next to each other.
154 built in a way that create large blocks next to each other.
155
155
156 >>> data = [
156 >>> data = [
157 ... 3, #0 (3)
157 ... 3, #0 (3)
158 ... 5, #1 (2)
158 ... 5, #1 (2)
159 ... 6, #2 (1)
159 ... 6, #2 (1)
160 ... 8, #3 (2)
160 ... 8, #3 (2)
161 ... 8, #4 (empty)
161 ... 8, #4 (empty)
162 ... 11, #5 (3)
162 ... 11, #5 (3)
163 ... 12, #6 (1)
163 ... 12, #6 (1)
164 ... 13, #7 (1)
164 ... 13, #7 (1)
165 ... 14, #8 (1)
165 ... 14, #8 (1)
166 ... ]
166 ... ]
167
167
168 == All snapshots cases ==
168 == All snapshots cases ==
169 >>> revlog = _testrevlog(data, snapshot=range(9))
169 >>> revlog = _testrevlog(data, snapshot=range(9))
170
170
171 Cases where chunk is already small enough
171 Cases where chunk is already small enough
172 >>> list(_slicechunktosize(revlog, [0], 3))
172 >>> list(_slicechunktosize(revlog, [0], 3))
173 [[0]]
173 [[0]]
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
174 >>> list(_slicechunktosize(revlog, [6, 7], 3))
175 [[6, 7]]
175 [[6, 7]]
176 >>> list(_slicechunktosize(revlog, [0], None))
176 >>> list(_slicechunktosize(revlog, [0], None))
177 [[0]]
177 [[0]]
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
178 >>> list(_slicechunktosize(revlog, [6, 7], None))
179 [[6, 7]]
179 [[6, 7]]
180
180
181 cases where we need actual slicing
181 cases where we need actual slicing
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
182 >>> list(_slicechunktosize(revlog, [0, 1], 3))
183 [[0], [1]]
183 [[0], [1]]
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
184 >>> list(_slicechunktosize(revlog, [1, 3], 3))
185 [[1], [3]]
185 [[1], [3]]
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
186 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
187 [[1, 2], [3]]
187 [[1, 2], [3]]
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
188 >>> list(_slicechunktosize(revlog, [3, 5], 3))
189 [[3], [5]]
189 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
190 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
191 [[3], [5]]
191 [[3], [5]]
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
192 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
193 [[5], [6, 7, 8]]
193 [[5], [6, 7, 8]]
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
194 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
195 [[0], [1, 2], [3], [5], [6, 7, 8]]
196
196
197 Case with too large individual chunk (must return valid chunk)
197 Case with too large individual chunk (must return valid chunk)
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
198 >>> list(_slicechunktosize(revlog, [0, 1], 2))
199 [[0], [1]]
199 [[0], [1]]
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
200 >>> list(_slicechunktosize(revlog, [1, 3], 1))
201 [[1], [3]]
201 [[1], [3]]
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
202 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
203 [[3], [5]]
203 [[3], [5]]
204
204
205 == No Snapshot cases ==
205 == No Snapshot cases ==
206 >>> revlog = _testrevlog(data)
206 >>> revlog = _testrevlog(data)
207
207
208 Cases where chunk is already small enough
208 Cases where chunk is already small enough
209 >>> list(_slicechunktosize(revlog, [0], 3))
209 >>> list(_slicechunktosize(revlog, [0], 3))
210 [[0]]
210 [[0]]
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
211 >>> list(_slicechunktosize(revlog, [6, 7], 3))
212 [[6, 7]]
212 [[6, 7]]
213 >>> list(_slicechunktosize(revlog, [0], None))
213 >>> list(_slicechunktosize(revlog, [0], None))
214 [[0]]
214 [[0]]
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
215 >>> list(_slicechunktosize(revlog, [6, 7], None))
216 [[6, 7]]
216 [[6, 7]]
217
217
218 cases where we need actual slicing
218 cases where we need actual slicing
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
219 >>> list(_slicechunktosize(revlog, [0, 1], 3))
220 [[0], [1]]
220 [[0], [1]]
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
221 >>> list(_slicechunktosize(revlog, [1, 3], 3))
222 [[1], [3]]
222 [[1], [3]]
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
223 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
224 [[1], [2, 3]]
224 [[1], [2, 3]]
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
225 >>> list(_slicechunktosize(revlog, [3, 5], 3))
226 [[3], [5]]
226 [[3], [5]]
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
227 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
228 [[3], [4, 5]]
228 [[3], [4, 5]]
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
229 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
230 [[5], [6, 7, 8]]
230 [[5], [6, 7, 8]]
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
231 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
232 [[0], [1, 2], [3], [5], [6, 7, 8]]
233
233
234 Case with too large individual chunk (must return valid chunk)
234 Case with too large individual chunk (must return valid chunk)
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
235 >>> list(_slicechunktosize(revlog, [0, 1], 2))
236 [[0], [1]]
236 [[0], [1]]
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
237 >>> list(_slicechunktosize(revlog, [1, 3], 1))
238 [[1], [3]]
238 [[1], [3]]
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
239 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
240 [[3], [5]]
240 [[3], [5]]
241
241
242 == mixed case ==
242 == mixed case ==
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
243 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
244 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
245 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
246 """
246 """
247 assert targetsize is None or 0 <= targetsize
247 assert targetsize is None or 0 <= targetsize
248 startdata = revlog.start(revs[0])
248 startdata = revlog.start(revs[0])
249 enddata = revlog.end(revs[-1])
249 enddata = revlog.end(revs[-1])
250 fullspan = enddata - startdata
250 fullspan = enddata - startdata
251 if targetsize is None or fullspan <= targetsize:
251 if targetsize is None or fullspan <= targetsize:
252 yield revs
252 yield revs
253 return
253 return
254
254
255 startrevidx = 0
255 startrevidx = 0
256 endrevidx = 1
256 endrevidx = 1
257 iterrevs = enumerate(revs)
257 iterrevs = enumerate(revs)
258 next(iterrevs) # skip first rev.
258 next(iterrevs) # skip first rev.
259 # first step: get snapshots out of the way
259 # first step: get snapshots out of the way
260 for idx, r in iterrevs:
260 for idx, r in iterrevs:
261 span = revlog.end(r) - startdata
261 span = revlog.end(r) - startdata
262 snapshot = revlog.issnapshot(r)
262 snapshot = revlog.issnapshot(r)
263 if span <= targetsize and snapshot:
263 if span <= targetsize and snapshot:
264 endrevidx = idx + 1
264 endrevidx = idx + 1
265 else:
265 else:
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
266 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
267 if chunk:
267 if chunk:
268 yield chunk
268 yield chunk
269 startrevidx = idx
269 startrevidx = idx
270 startdata = revlog.start(r)
270 startdata = revlog.start(r)
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 if not snapshot:
272 if not snapshot:
273 break
273 break
274
274
275 # for the others, we use binary slicing to quickly converge toward valid
275 # for the others, we use binary slicing to quickly converge toward valid
276 # chunks (otherwise, we might end up looking for start/end of many
276 # chunks (otherwise, we might end up looking for start/end of many
277 # revisions). This logic is not looking for the perfect slicing point, it
277 # revisions). This logic is not looking for the perfect slicing point, it
278 # focuses on quickly converging toward valid chunks.
278 # focuses on quickly converging toward valid chunks.
279 nbitem = len(revs)
279 nbitem = len(revs)
280 while (enddata - startdata) > targetsize:
280 while (enddata - startdata) > targetsize:
281 endrevidx = nbitem
281 endrevidx = nbitem
282 if nbitem - startrevidx <= 1:
282 if nbitem - startrevidx <= 1:
283 break # protect against individual chunk larger than limit
283 break # protect against individual chunk larger than limit
284 localenddata = revlog.end(revs[endrevidx - 1])
284 localenddata = revlog.end(revs[endrevidx - 1])
285 span = localenddata - startdata
285 span = localenddata - startdata
286 while span > targetsize:
286 while span > targetsize:
287 if endrevidx - startrevidx <= 1:
287 if endrevidx - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
289 endrevidx -= (endrevidx - startrevidx) // 2
289 endrevidx -= (endrevidx - startrevidx) // 2
290 localenddata = revlog.end(revs[endrevidx - 1])
290 localenddata = revlog.end(revs[endrevidx - 1])
291 span = localenddata - startdata
291 span = localenddata - startdata
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
292 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
293 if chunk:
293 if chunk:
294 yield chunk
294 yield chunk
295 startrevidx = endrevidx
295 startrevidx = endrevidx
296 startdata = revlog.start(revs[startrevidx])
296 startdata = revlog.start(revs[startrevidx])
297
297
298 chunk = _trimchunk(revlog, revs, startrevidx)
298 chunk = _trimchunk(revlog, revs, startrevidx)
299 if chunk:
299 if chunk:
300 yield chunk
300 yield chunk
301
301
302
302
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
303 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
304 """slice revs to reduce the amount of unrelated data to be read from disk.
304 """slice revs to reduce the amount of unrelated data to be read from disk.
305
305
306 ``revs`` is sliced into groups that should be read in one time.
306 ``revs`` is sliced into groups that should be read in one time.
307 Assume that revs are sorted.
307 Assume that revs are sorted.
308
308
309 The initial chunk is sliced until the overall density (payload/chunks-span
309 The initial chunk is sliced until the overall density (payload/chunks-span
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
310 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
311 skipped.
311 skipped.
312
312
313 >>> revlog = _testrevlog([
313 >>> revlog = _testrevlog([
314 ... 5, #00 (5)
314 ... 5, #00 (5)
315 ... 10, #01 (5)
315 ... 10, #01 (5)
316 ... 12, #02 (2)
316 ... 12, #02 (2)
317 ... 12, #03 (empty)
317 ... 12, #03 (empty)
318 ... 27, #04 (15)
318 ... 27, #04 (15)
319 ... 31, #05 (4)
319 ... 31, #05 (4)
320 ... 31, #06 (empty)
320 ... 31, #06 (empty)
321 ... 42, #07 (11)
321 ... 42, #07 (11)
322 ... 47, #08 (5)
322 ... 47, #08 (5)
323 ... 47, #09 (empty)
323 ... 47, #09 (empty)
324 ... 48, #10 (1)
324 ... 48, #10 (1)
325 ... 51, #11 (3)
325 ... 51, #11 (3)
326 ... 74, #12 (23)
326 ... 74, #12 (23)
327 ... 85, #13 (11)
327 ... 85, #13 (11)
328 ... 86, #14 (1)
328 ... 86, #14 (1)
329 ... 91, #15 (5)
329 ... 91, #15 (5)
330 ... ])
330 ... ])
331
331
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
332 >>> list(_slicechunktodensity(revlog, list(range(16))))
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
333 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
334 >>> list(_slicechunktodensity(revlog, [0, 15]))
335 [[0], [15]]
335 [[0], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
337 [[0], [11], [15]]
337 [[0], [11], [15]]
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
338 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
339 [[0], [11, 13, 15]]
339 [[0], [11, 13, 15]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
341 [[1, 2], [5, 8, 10, 11], [14]]
341 [[1, 2], [5, 8, 10, 11], [14]]
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 ... mingapsize=20))
343 ... mingapsize=20))
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
344 [[1, 2, 3, 5, 8, 10, 11], [14]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 ... targetdensity=0.95))
346 ... targetdensity=0.95))
347 [[1, 2], [5], [8, 10, 11], [14]]
347 [[1, 2], [5], [8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... targetdensity=0.95, mingapsize=12))
349 ... targetdensity=0.95, mingapsize=12))
350 [[1, 2], [5, 8, 10, 11], [14]]
350 [[1, 2], [5, 8, 10, 11], [14]]
351 """
351 """
352 start = revlog.start
352 start = revlog.start
353 length = revlog.length
353 length = revlog.length
354
354
355 if len(revs) <= 1:
355 if len(revs) <= 1:
356 yield revs
356 yield revs
357 return
357 return
358
358
359 deltachainspan = segmentspan(revlog, revs)
359 deltachainspan = segmentspan(revlog, revs)
360
360
361 if deltachainspan < mingapsize:
361 if deltachainspan < mingapsize:
362 yield revs
362 yield revs
363 return
363 return
364
364
365 readdata = deltachainspan
365 readdata = deltachainspan
366 chainpayload = sum(length(r) for r in revs)
366 chainpayload = sum(length(r) for r in revs)
367
367
368 if deltachainspan:
368 if deltachainspan:
369 density = chainpayload / float(deltachainspan)
369 density = chainpayload / float(deltachainspan)
370 else:
370 else:
371 density = 1.0
371 density = 1.0
372
372
373 if density >= targetdensity:
373 if density >= targetdensity:
374 yield revs
374 yield revs
375 return
375 return
376
376
377 # Store the gaps in a heap to have them sorted by decreasing size
377 # Store the gaps in a heap to have them sorted by decreasing size
378 gaps = []
378 gaps = []
379 prevend = None
379 prevend = None
380 for i, rev in enumerate(revs):
380 for i, rev in enumerate(revs):
381 revstart = start(rev)
381 revstart = start(rev)
382 revlen = length(rev)
382 revlen = length(rev)
383
383
384 # Skip empty revisions to form larger holes
384 # Skip empty revisions to form larger holes
385 if revlen == 0:
385 if revlen == 0:
386 continue
386 continue
387
387
388 if prevend is not None:
388 if prevend is not None:
389 gapsize = revstart - prevend
389 gapsize = revstart - prevend
390 # only consider holes that are large enough
390 # only consider holes that are large enough
391 if gapsize > mingapsize:
391 if gapsize > mingapsize:
392 gaps.append((gapsize, i))
392 gaps.append((gapsize, i))
393
393
394 prevend = revstart + revlen
394 prevend = revstart + revlen
395 # sort the gaps to pop them from largest to small
395 # sort the gaps to pop them from largest to small
396 gaps.sort()
396 gaps.sort()
397
397
398 # Collect the indices of the largest holes until the density is acceptable
398 # Collect the indices of the largest holes until the density is acceptable
399 selected = []
399 selected = []
400 while gaps and density < targetdensity:
400 while gaps and density < targetdensity:
401 gapsize, gapidx = gaps.pop()
401 gapsize, gapidx = gaps.pop()
402
402
403 selected.append(gapidx)
403 selected.append(gapidx)
404
404
405 # the gap sizes are stored as negatives to be sorted decreasingly
405 # the gap sizes are stored as negatives to be sorted decreasingly
406 # by the heap
406 # by the heap
407 readdata -= gapsize
407 readdata -= gapsize
408 if readdata > 0:
408 if readdata > 0:
409 density = chainpayload / float(readdata)
409 density = chainpayload / float(readdata)
410 else:
410 else:
411 density = 1.0
411 density = 1.0
412 selected.sort()
412 selected.sort()
413
413
414 # Cut the revs at collected indices
414 # Cut the revs at collected indices
415 previdx = 0
415 previdx = 0
416 for idx in selected:
416 for idx in selected:
417
417
418 chunk = _trimchunk(revlog, revs, previdx, idx)
418 chunk = _trimchunk(revlog, revs, previdx, idx)
419 if chunk:
419 if chunk:
420 yield chunk
420 yield chunk
421
421
422 previdx = idx
422 previdx = idx
423
423
424 chunk = _trimchunk(revlog, revs, previdx)
424 chunk = _trimchunk(revlog, revs, previdx)
425 if chunk:
425 if chunk:
426 yield chunk
426 yield chunk
427
427
428
428
429 def _trimchunk(revlog, revs, startidx, endidx=None):
429 def _trimchunk(revlog, revs, startidx, endidx=None):
430 """returns revs[startidx:endidx] without empty trailing revs
430 """returns revs[startidx:endidx] without empty trailing revs
431
431
432 Doctest Setup
432 Doctest Setup
433 >>> revlog = _testrevlog([
433 >>> revlog = _testrevlog([
434 ... 5, #0
434 ... 5, #0
435 ... 10, #1
435 ... 10, #1
436 ... 12, #2
436 ... 12, #2
437 ... 12, #3 (empty)
437 ... 12, #3 (empty)
438 ... 17, #4
438 ... 17, #4
439 ... 21, #5
439 ... 21, #5
440 ... 21, #6 (empty)
440 ... 21, #6 (empty)
441 ... ])
441 ... ])
442
442
443 Contiguous cases:
443 Contiguous cases:
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
444 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
445 [0, 1, 2, 3, 4, 5]
445 [0, 1, 2, 3, 4, 5]
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
447 [0, 1, 2, 3, 4]
447 [0, 1, 2, 3, 4]
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
449 [0, 1, 2]
449 [0, 1, 2]
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
451 [2]
451 [2]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
453 [3, 4, 5]
453 [3, 4, 5]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
455 [3, 4]
455 [3, 4]
456
456
457 Discontiguous cases:
457 Discontiguous cases:
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
458 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
459 [1, 3, 5]
459 [1, 3, 5]
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
461 [1]
461 [1]
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
463 [3, 5]
463 [3, 5]
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
465 [3, 5]
465 [3, 5]
466 """
466 """
467 length = revlog.length
467 length = revlog.length
468
468
469 if endidx is None:
469 if endidx is None:
470 endidx = len(revs)
470 endidx = len(revs)
471
471
472 # If we have a non-emtpy delta candidate, there are nothing to trim
472 # If we have a non-emtpy delta candidate, there are nothing to trim
473 if revs[endidx - 1] < len(revlog):
473 if revs[endidx - 1] < len(revlog):
474 # Trim empty revs at the end, except the very first revision of a chain
474 # Trim empty revs at the end, except the very first revision of a chain
475 while (
475 while (
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
476 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
477 ):
477 ):
478 endidx -= 1
478 endidx -= 1
479
479
480 return revs[startidx:endidx]
480 return revs[startidx:endidx]
481
481
482
482
483 def segmentspan(revlog, revs):
483 def segmentspan(revlog, revs):
484 """Get the byte span of a segment of revisions
484 """Get the byte span of a segment of revisions
485
485
486 revs is a sorted array of revision numbers
486 revs is a sorted array of revision numbers
487
487
488 >>> revlog = _testrevlog([
488 >>> revlog = _testrevlog([
489 ... 5, #0
489 ... 5, #0
490 ... 10, #1
490 ... 10, #1
491 ... 12, #2
491 ... 12, #2
492 ... 12, #3 (empty)
492 ... 12, #3 (empty)
493 ... 17, #4
493 ... 17, #4
494 ... ])
494 ... ])
495
495
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
496 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
497 17
497 17
498 >>> segmentspan(revlog, [0, 4])
498 >>> segmentspan(revlog, [0, 4])
499 17
499 17
500 >>> segmentspan(revlog, [3, 4])
500 >>> segmentspan(revlog, [3, 4])
501 5
501 5
502 >>> segmentspan(revlog, [1, 2, 3,])
502 >>> segmentspan(revlog, [1, 2, 3,])
503 7
503 7
504 >>> segmentspan(revlog, [1, 3])
504 >>> segmentspan(revlog, [1, 3])
505 7
505 7
506 """
506 """
507 if not revs:
507 if not revs:
508 return 0
508 return 0
509 end = revlog.end(revs[-1])
509 end = revlog.end(revs[-1])
510 return end - revlog.start(revs[0])
510 return end - revlog.start(revs[0])
511
511
512
512
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
513 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
514 """build full text from a (base, delta) pair and other metadata"""
514 """build full text from a (base, delta) pair and other metadata"""
515 # special case deltas which replace entire base; no need to decode
515 # special case deltas which replace entire base; no need to decode
516 # base revision. this neatly avoids censored bases, which throw when
516 # base revision. this neatly avoids censored bases, which throw when
517 # they're decoded.
517 # they're decoded.
518 hlen = struct.calcsize(b">lll")
518 hlen = struct.calcsize(b">lll")
519 if delta[:hlen] == mdiff.replacediffheader(
519 if delta[:hlen] == mdiff.replacediffheader(
520 revlog.rawsize(baserev), len(delta) - hlen
520 revlog.rawsize(baserev), len(delta) - hlen
521 ):
521 ):
522 fulltext = delta[hlen:]
522 fulltext = delta[hlen:]
523 else:
523 else:
524 # deltabase is rawtext before changed by flag processors, which is
524 # deltabase is rawtext before changed by flag processors, which is
525 # equivalent to non-raw text
525 # equivalent to non-raw text
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
526 basetext = revlog.revision(baserev, _df=fh, raw=False)
527 fulltext = mdiff.patch(basetext, delta)
527 fulltext = mdiff.patch(basetext, delta)
528
528
529 try:
529 try:
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
530 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
531 if validatehash:
531 if validatehash:
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
532 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
533 if flags & REVIDX_ISCENSORED:
533 if flags & REVIDX_ISCENSORED:
534 raise error.StorageError(
534 raise error.StorageError(
535 _(b'node %s is not censored') % expectednode
535 _(b'node %s is not censored') % expectednode
536 )
536 )
537 except error.CensoredNodeError:
537 except error.CensoredNodeError:
538 # must pass the censored index flag to add censored revisions
538 # must pass the censored index flag to add censored revisions
539 if not flags & REVIDX_ISCENSORED:
539 if not flags & REVIDX_ISCENSORED:
540 raise
540 raise
541 return fulltext
541 return fulltext
542
542
543
543
544 @attr.s(slots=True, frozen=True)
544 @attr.s(slots=True, frozen=True)
545 class _deltainfo(object):
545 class _deltainfo(object):
546 distance = attr.ib()
546 distance = attr.ib()
547 deltalen = attr.ib()
547 deltalen = attr.ib()
548 data = attr.ib()
548 data = attr.ib()
549 base = attr.ib()
549 base = attr.ib()
550 chainbase = attr.ib()
550 chainbase = attr.ib()
551 chainlen = attr.ib()
551 chainlen = attr.ib()
552 compresseddeltalen = attr.ib()
552 compresseddeltalen = attr.ib()
553 snapshotdepth = attr.ib()
553 snapshotdepth = attr.ib()
554
554
555
555
556 def drop_u_compression(delta):
556 def drop_u_compression(delta):
557 """turn into a "u" (no-compression) into no-compression without header
557 """turn into a "u" (no-compression) into no-compression without header
558
558
559 This is useful for revlog format that has better compression method.
559 This is useful for revlog format that has better compression method.
560 """
560 """
561 assert delta.data[0] == b'u', delta.data[0]
561 assert delta.data[0] == b'u', delta.data[0]
562 return _deltainfo(
562 return _deltainfo(
563 delta.distance,
563 delta.distance,
564 delta.deltalen - 1,
564 delta.deltalen - 1,
565 (b'', delta.data[1]),
565 (b'', delta.data[1]),
566 delta.base,
566 delta.base,
567 delta.chainbase,
567 delta.chainbase,
568 delta.chainlen,
568 delta.chainlen,
569 delta.compresseddeltalen,
569 delta.compresseddeltalen,
570 delta.snapshotdepth,
570 delta.snapshotdepth,
571 )
571 )
572
572
573
573
574 def isgooddeltainfo(revlog, deltainfo, revinfo):
574 def isgooddeltainfo(revlog, deltainfo, revinfo):
575 """Returns True if the given delta is good. Good means that it is within
575 """Returns True if the given delta is good. Good means that it is within
576 the disk span, disk size, and chain length bounds that we know to be
576 the disk span, disk size, and chain length bounds that we know to be
577 performant."""
577 performant."""
578 if deltainfo is None:
578 if deltainfo is None:
579 return False
579 return False
580
580
581 # - 'deltainfo.distance' is the distance from the base revision --
581 # - 'deltainfo.distance' is the distance from the base revision --
582 # bounding it limits the amount of I/O we need to do.
582 # bounding it limits the amount of I/O we need to do.
583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
583 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
584 # deltas we need to apply -- bounding it limits the amount of CPU
584 # deltas we need to apply -- bounding it limits the amount of CPU
585 # we consume.
585 # we consume.
586
586
587 textlen = revinfo.textlen
587 textlen = revinfo.textlen
588 defaultmax = textlen * 4
588 defaultmax = textlen * 4
589 maxdist = revlog._maxdeltachainspan
589 maxdist = revlog._maxdeltachainspan
590 if not maxdist:
590 if not maxdist:
591 maxdist = deltainfo.distance # ensure the conditional pass
591 maxdist = deltainfo.distance # ensure the conditional pass
592 maxdist = max(maxdist, defaultmax)
592 maxdist = max(maxdist, defaultmax)
593
593
594 # Bad delta from read span:
594 # Bad delta from read span:
595 #
595 #
596 # If the span of data read is larger than the maximum allowed.
596 # If the span of data read is larger than the maximum allowed.
597 #
597 #
598 # In the sparse-revlog case, we rely on the associated "sparse reading"
598 # In the sparse-revlog case, we rely on the associated "sparse reading"
599 # to avoid issue related to the span of data. In theory, it would be
599 # to avoid issue related to the span of data. In theory, it would be
600 # possible to build pathological revlog where delta pattern would lead
600 # possible to build pathological revlog where delta pattern would lead
601 # to too many reads. However, they do not happen in practice at all. So
601 # to too many reads. However, they do not happen in practice at all. So
602 # we skip the span check entirely.
602 # we skip the span check entirely.
603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
603 if not revlog._sparserevlog and maxdist < deltainfo.distance:
604 return False
604 return False
605
605
606 # Bad delta from new delta size:
606 # Bad delta from new delta size:
607 #
607 #
608 # If the delta size is larger than the target text, storing the
608 # If the delta size is larger than the target text, storing the
609 # delta will be inefficient.
609 # delta will be inefficient.
610 if textlen < deltainfo.deltalen:
610 if textlen < deltainfo.deltalen:
611 return False
611 return False
612
612
613 # Bad delta from cumulated payload size:
613 # Bad delta from cumulated payload size:
614 #
614 #
615 # If the sum of delta get larger than K * target text length.
615 # If the sum of delta get larger than K * target text length.
616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
616 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
617 return False
617 return False
618
618
619 # Bad delta from chain length:
619 # Bad delta from chain length:
620 #
620 #
621 # If the number of delta in the chain gets too high.
621 # If the number of delta in the chain gets too high.
622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
622 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
623 return False
623 return False
624
624
625 # bad delta from intermediate snapshot size limit
625 # bad delta from intermediate snapshot size limit
626 #
626 #
627 # If an intermediate snapshot size is higher than the limit. The
627 # If an intermediate snapshot size is higher than the limit. The
628 # limit exist to prevent endless chain of intermediate delta to be
628 # limit exist to prevent endless chain of intermediate delta to be
629 # created.
629 # created.
630 if (
630 if (
631 deltainfo.snapshotdepth is not None
631 deltainfo.snapshotdepth is not None
632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
632 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
633 ):
633 ):
634 return False
634 return False
635
635
636 # bad delta if new intermediate snapshot is larger than the previous
636 # bad delta if new intermediate snapshot is larger than the previous
637 # snapshot
637 # snapshot
638 if (
638 if (
639 deltainfo.snapshotdepth
639 deltainfo.snapshotdepth
640 and revlog.length(deltainfo.base) < deltainfo.deltalen
640 and revlog.length(deltainfo.base) < deltainfo.deltalen
641 ):
641 ):
642 return False
642 return False
643
643
644 return True
644 return True
645
645
646
646
647 # If a revision's full text is that much bigger than a base candidate full
647 # If a revision's full text is that much bigger than a base candidate full
648 # text's, it is very unlikely that it will produce a valid delta. We no longer
648 # text's, it is very unlikely that it will produce a valid delta. We no longer
649 # consider these candidates.
649 # consider these candidates.
650 LIMIT_BASE2TEXT = 500
650 LIMIT_BASE2TEXT = 500
651
651
652
652
653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
653 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
654 """Provides group of revision to be tested as delta base
654 """Provides group of revision to be tested as delta base
655
655
656 This top level function focus on emitting groups with unique and worthwhile
656 This top level function focus on emitting groups with unique and worthwhile
657 content. See _raw_candidate_groups for details about the group order.
657 content. See _raw_candidate_groups for details about the group order.
658 """
658 """
659 # should we try to build a delta?
659 # should we try to build a delta?
660 if not (len(revlog) and revlog._storedeltachains):
660 if not (len(revlog) and revlog._storedeltachains):
661 yield None
661 yield None
662 return
662 return
663
663
664 deltalength = revlog.length
664 deltalength = revlog.length
665 deltaparent = revlog.deltaparent
665 deltaparent = revlog.deltaparent
666 sparse = revlog._sparserevlog
666 sparse = revlog._sparserevlog
667 good = None
667 good = None
668
668
669 deltas_limit = textlen * LIMIT_DELTA2TEXT
669 deltas_limit = textlen * LIMIT_DELTA2TEXT
670
670
671 tested = {nullrev}
671 tested = {nullrev}
672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
672 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
673 while True:
673 while True:
674 temptative = candidates.send(good)
674 temptative = candidates.send(good)
675 if temptative is None:
675 if temptative is None:
676 break
676 break
677 group = []
677 group = []
678 for rev in temptative:
678 for rev in temptative:
679 # skip over empty delta (no need to include them in a chain)
679 # skip over empty delta (no need to include them in a chain)
680 while revlog._generaldelta and not (
680 while revlog._generaldelta and not (
681 rev == nullrev or rev in tested or deltalength(rev)
681 rev == nullrev or rev in tested or deltalength(rev)
682 ):
682 ):
683 tested.add(rev)
683 tested.add(rev)
684 rev = deltaparent(rev)
684 rev = deltaparent(rev)
685 # no need to try a delta against nullrev, this will be done as a
685 # no need to try a delta against nullrev, this will be done as a
686 # last resort.
686 # last resort.
687 if rev == nullrev:
687 if rev == nullrev:
688 continue
688 continue
689 # filter out revision we tested already
689 # filter out revision we tested already
690 if rev in tested:
690 if rev in tested:
691 continue
691 continue
692 tested.add(rev)
692 tested.add(rev)
693 # filter out delta base that will never produce good delta
693 # filter out delta base that will never produce good delta
694 if deltas_limit < revlog.length(rev):
694 if deltas_limit < revlog.length(rev):
695 continue
695 continue
696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
696 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
697 continue
697 continue
698 # no delta for rawtext-changing revs (see "candelta" for why)
698 # no delta for rawtext-changing revs (see "candelta" for why)
699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
699 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
700 continue
700 continue
701 # If we reach here, we are about to build and test a delta.
701 # If we reach here, we are about to build and test a delta.
702 # The delta building process will compute the chaininfo in all
702 # The delta building process will compute the chaininfo in all
703 # case, since that computation is cached, it is fine to access it
703 # case, since that computation is cached, it is fine to access it
704 # here too.
704 # here too.
705 chainlen, chainsize = revlog._chaininfo(rev)
705 chainlen, chainsize = revlog._chaininfo(rev)
706 # if chain will be too long, skip base
706 # if chain will be too long, skip base
707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
707 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
708 continue
708 continue
709 # if chain already have too much data, skip base
709 # if chain already have too much data, skip base
710 if deltas_limit < chainsize:
710 if deltas_limit < chainsize:
711 continue
711 continue
712 if sparse and revlog.upperboundcomp is not None:
712 if sparse and revlog.upperboundcomp is not None:
713 maxcomp = revlog.upperboundcomp
713 maxcomp = revlog.upperboundcomp
714 basenotsnap = (p1, p2, nullrev)
714 basenotsnap = (p1, p2, nullrev)
715 if rev not in basenotsnap and revlog.issnapshot(rev):
715 if rev not in basenotsnap and revlog.issnapshot(rev):
716 snapshotdepth = revlog.snapshotdepth(rev)
716 snapshotdepth = revlog.snapshotdepth(rev)
717 # If text is significantly larger than the base, we can
717 # If text is significantly larger than the base, we can
718 # expect the resulting delta to be proportional to the size
718 # expect the resulting delta to be proportional to the size
719 # difference
719 # difference
720 revsize = revlog.rawsize(rev)
720 revsize = revlog.rawsize(rev)
721 rawsizedistance = max(textlen - revsize, 0)
721 rawsizedistance = max(textlen - revsize, 0)
722 # use an estimate of the compression upper bound.
722 # use an estimate of the compression upper bound.
723 lowestrealisticdeltalen = rawsizedistance // maxcomp
723 lowestrealisticdeltalen = rawsizedistance // maxcomp
724
724
725 # check the absolute constraint on the delta size
725 # check the absolute constraint on the delta size
726 snapshotlimit = textlen >> snapshotdepth
726 snapshotlimit = textlen >> snapshotdepth
727 if snapshotlimit < lowestrealisticdeltalen:
727 if snapshotlimit < lowestrealisticdeltalen:
728 # delta lower bound is larger than accepted upper bound
728 # delta lower bound is larger than accepted upper bound
729 continue
729 continue
730
730
731 # check the relative constraint on the delta size
731 # check the relative constraint on the delta size
732 revlength = revlog.length(rev)
732 revlength = revlog.length(rev)
733 if revlength < lowestrealisticdeltalen:
733 if revlength < lowestrealisticdeltalen:
734 # delta probable lower bound is larger than target base
734 # delta probable lower bound is larger than target base
735 continue
735 continue
736
736
737 group.append(rev)
737 group.append(rev)
738 if group:
738 if group:
739 # XXX: in the sparse revlog case, group can become large,
739 # XXX: in the sparse revlog case, group can become large,
740 # impacting performances. Some bounding or slicing mecanism
740 # impacting performances. Some bounding or slicing mecanism
741 # would help to reduce this impact.
741 # would help to reduce this impact.
742 good = yield tuple(group)
742 good = yield tuple(group)
743 yield None
743 yield None
744
744
745
745
746 def _findsnapshots(revlog, cache, start_rev):
746 def _findsnapshots(revlog, cache, start_rev):
747 """find snapshot from start_rev to tip"""
747 """find snapshot from start_rev to tip"""
748 if util.safehasattr(revlog.index, b'findsnapshots'):
748 if util.safehasattr(revlog.index, b'findsnapshots'):
749 revlog.index.findsnapshots(cache, start_rev)
749 revlog.index.findsnapshots(cache, start_rev)
750 else:
750 else:
751 deltaparent = revlog.deltaparent
751 deltaparent = revlog.deltaparent
752 issnapshot = revlog.issnapshot
752 issnapshot = revlog.issnapshot
753 for rev in revlog.revs(start_rev):
753 for rev in revlog.revs(start_rev):
754 if issnapshot(rev):
754 if issnapshot(rev):
755 cache[deltaparent(rev)].append(rev)
755 cache[deltaparent(rev)].append(rev)
756
756
757
757
758 def _refinedgroups(revlog, p1, p2, cachedelta):
758 def _refinedgroups(revlog, p1, p2, cachedelta):
759 good = None
759 good = None
760 # First we try to reuse a the delta contained in the bundle.
760 # First we try to reuse a the delta contained in the bundle.
761 # (or from the source revlog)
761 # (or from the source revlog)
762 #
762 #
763 # This logic only applies to general delta repositories and can be disabled
763 # This logic only applies to general delta repositories and can be disabled
764 # through configuration. Disabling reuse source delta is useful when
764 # through configuration. Disabling reuse source delta is useful when
765 # we want to make sure we recomputed "optimal" deltas.
765 # we want to make sure we recomputed "optimal" deltas.
766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
766 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
767 # Assume what we received from the server is a good choice
767 # Assume what we received from the server is a good choice
768 # build delta will reuse the cache
768 # build delta will reuse the cache
769 good = yield (cachedelta[0],)
769 good = yield (cachedelta[0],)
770 if good is not None:
770 if good is not None:
771 yield None
771 yield None
772 return
772 return
773 snapshots = collections.defaultdict(list)
773 snapshots = collections.defaultdict(list)
774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
774 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
775 good = yield candidates
775 good = yield candidates
776 if good is not None:
776 if good is not None:
777 break
777 break
778
778
779 # If sparse revlog is enabled, we can try to refine the available deltas
779 # If sparse revlog is enabled, we can try to refine the available deltas
780 if not revlog._sparserevlog:
780 if not revlog._sparserevlog:
781 yield None
781 yield None
782 return
782 return
783
783
784 # if we have a refinable value, try to refine it
784 # if we have a refinable value, try to refine it
785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
785 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
786 # refine snapshot down
786 # refine snapshot down
787 previous = None
787 previous = None
788 while previous != good:
788 while previous != good:
789 previous = good
789 previous = good
790 base = revlog.deltaparent(good)
790 base = revlog.deltaparent(good)
791 if base == nullrev:
791 if base == nullrev:
792 break
792 break
793 good = yield (base,)
793 good = yield (base,)
794 # refine snapshot up
794 # refine snapshot up
795 if not snapshots:
795 if not snapshots:
796 _findsnapshots(revlog, snapshots, good + 1)
796 _findsnapshots(revlog, snapshots, good + 1)
797 previous = None
797 previous = None
798 while good != previous:
798 while good != previous:
799 previous = good
799 previous = good
800 children = tuple(sorted(c for c in snapshots[good]))
800 children = tuple(sorted(c for c in snapshots[good]))
801 good = yield children
801 good = yield children
802
802
803 # we have found nothing
803 # we have found nothing
804 yield None
804 yield None
805
805
806
806
807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
807 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
808 """Provides group of revision to be tested as delta base
808 """Provides group of revision to be tested as delta base
809
809
810 This lower level function focus on emitting delta theorically interresting
810 This lower level function focus on emitting delta theorically interresting
811 without looking it any practical details.
811 without looking it any practical details.
812
812
813 The group order aims at providing fast or small candidates first.
813 The group order aims at providing fast or small candidates first.
814 """
814 """
815 gdelta = revlog._generaldelta
815 gdelta = revlog._generaldelta
816 # gate sparse behind general-delta because of issue6056
816 # gate sparse behind general-delta because of issue6056
817 sparse = gdelta and revlog._sparserevlog
817 sparse = gdelta and revlog._sparserevlog
818 curr = len(revlog)
818 curr = len(revlog)
819 prev = curr - 1
819 prev = curr - 1
820 deltachain = lambda rev: revlog._deltachain(rev)[0]
820 deltachain = lambda rev: revlog._deltachain(rev)[0]
821
821
822 if gdelta:
822 if gdelta:
823 # exclude already lazy tested base if any
823 # exclude already lazy tested base if any
824 parents = [p for p in (p1, p2) if p != nullrev]
824 parents = [p for p in (p1, p2) if p != nullrev]
825
825
826 if not revlog._deltabothparents and len(parents) == 2:
826 if not revlog._deltabothparents and len(parents) == 2:
827 parents.sort()
827 parents.sort()
828 # To minimize the chance of having to build a fulltext,
828 # To minimize the chance of having to build a fulltext,
829 # pick first whichever parent is closest to us (max rev)
829 # pick first whichever parent is closest to us (max rev)
830 yield (parents[1],)
830 yield (parents[1],)
831 # then the other one (min rev) if the first did not fit
831 # then the other one (min rev) if the first did not fit
832 yield (parents[0],)
832 yield (parents[0],)
833 elif len(parents) > 0:
833 elif len(parents) > 0:
834 # Test all parents (1 or 2), and keep the best candidate
834 # Test all parents (1 or 2), and keep the best candidate
835 yield parents
835 yield parents
836
836
837 if sparse and parents:
837 if sparse and parents:
838 if snapshots is None:
838 if snapshots is None:
839 # map: base-rev: snapshot-rev
839 # map: base-rev: snapshot-rev
840 snapshots = collections.defaultdict(list)
840 snapshots = collections.defaultdict(list)
841 # See if we can use an existing snapshot in the parent chains to use as
841 # See if we can use an existing snapshot in the parent chains to use as
842 # a base for a new intermediate-snapshot
842 # a base for a new intermediate-snapshot
843 #
843 #
844 # search for snapshot in parents delta chain
844 # search for snapshot in parents delta chain
845 # map: snapshot-level: snapshot-rev
845 # map: snapshot-level: snapshot-rev
846 parents_snaps = collections.defaultdict(set)
846 parents_snaps = collections.defaultdict(set)
847 candidate_chains = [deltachain(p) for p in parents]
847 candidate_chains = [deltachain(p) for p in parents]
848 for chain in candidate_chains:
848 for chain in candidate_chains:
849 for idx, s in enumerate(chain):
849 for idx, s in enumerate(chain):
850 if not revlog.issnapshot(s):
850 if not revlog.issnapshot(s):
851 break
851 break
852 parents_snaps[idx].add(s)
852 parents_snaps[idx].add(s)
853 snapfloor = min(parents_snaps[0]) + 1
853 snapfloor = min(parents_snaps[0]) + 1
854 _findsnapshots(revlog, snapshots, snapfloor)
854 _findsnapshots(revlog, snapshots, snapfloor)
855 # search for the highest "unrelated" revision
855 # search for the highest "unrelated" revision
856 #
856 #
857 # Adding snapshots used by "unrelated" revision increase the odd we
857 # Adding snapshots used by "unrelated" revision increase the odd we
858 # reuse an independant, yet better snapshot chain.
858 # reuse an independant, yet better snapshot chain.
859 #
859 #
860 # XXX instead of building a set of revisions, we could lazily enumerate
860 # XXX instead of building a set of revisions, we could lazily enumerate
861 # over the chains. That would be more efficient, however we stick to
861 # over the chains. That would be more efficient, however we stick to
862 # simple code for now.
862 # simple code for now.
863 all_revs = set()
863 all_revs = set()
864 for chain in candidate_chains:
864 for chain in candidate_chains:
865 all_revs.update(chain)
865 all_revs.update(chain)
866 other = None
866 other = None
867 for r in revlog.revs(prev, snapfloor):
867 for r in revlog.revs(prev, snapfloor):
868 if r not in all_revs:
868 if r not in all_revs:
869 other = r
869 other = r
870 break
870 break
871 if other is not None:
871 if other is not None:
872 # To avoid unfair competition, we won't use unrelated intermediate
872 # To avoid unfair competition, we won't use unrelated intermediate
873 # snapshot that are deeper than the ones from the parent delta
873 # snapshot that are deeper than the ones from the parent delta
874 # chain.
874 # chain.
875 max_depth = max(parents_snaps.keys())
875 max_depth = max(parents_snaps.keys())
876 chain = deltachain(other)
876 chain = deltachain(other)
877 for idx, s in enumerate(chain):
877 for idx, s in enumerate(chain):
878 if s < snapfloor:
878 if s < snapfloor:
879 continue
879 continue
880 if max_depth < idx:
880 if max_depth < idx:
881 break
881 break
882 if not revlog.issnapshot(s):
882 if not revlog.issnapshot(s):
883 break
883 break
884 parents_snaps[idx].add(s)
884 parents_snaps[idx].add(s)
885 # Test them as possible intermediate snapshot base
885 # Test them as possible intermediate snapshot base
886 # We test them from highest to lowest level. High level one are more
886 # We test them from highest to lowest level. High level one are more
887 # likely to result in small delta
887 # likely to result in small delta
888 floor = None
888 floor = None
889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
889 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
890 siblings = set()
890 siblings = set()
891 for s in snaps:
891 for s in snaps:
892 siblings.update(snapshots[s])
892 siblings.update(snapshots[s])
893 # Before considering making a new intermediate snapshot, we check
893 # Before considering making a new intermediate snapshot, we check
894 # if an existing snapshot, children of base we consider, would be
894 # if an existing snapshot, children of base we consider, would be
895 # suitable.
895 # suitable.
896 #
896 #
897 # It give a change to reuse a delta chain "unrelated" to the
897 # It give a change to reuse a delta chain "unrelated" to the
898 # current revision instead of starting our own. Without such
898 # current revision instead of starting our own. Without such
899 # re-use, topological branches would keep reopening new chains.
899 # re-use, topological branches would keep reopening new chains.
900 # Creating more and more snapshot as the repository grow.
900 # Creating more and more snapshot as the repository grow.
901
901
902 if floor is not None:
902 if floor is not None:
903 # We only do this for siblings created after the one in our
903 # We only do this for siblings created after the one in our
904 # parent's delta chain. Those created before has less chances
904 # parent's delta chain. Those created before has less chances
905 # to be valid base since our ancestors had to create a new
905 # to be valid base since our ancestors had to create a new
906 # snapshot.
906 # snapshot.
907 siblings = [r for r in siblings if floor < r]
907 siblings = [r for r in siblings if floor < r]
908 yield tuple(sorted(siblings))
908 yield tuple(sorted(siblings))
909 # then test the base from our parent's delta chain.
909 # then test the base from our parent's delta chain.
910 yield tuple(sorted(snaps))
910 yield tuple(sorted(snaps))
911 floor = min(snaps)
911 floor = min(snaps)
912 # No suitable base found in the parent chain, search if any full
912 # No suitable base found in the parent chain, search if any full
913 # snapshots emitted since parent's base would be a suitable base for an
913 # snapshots emitted since parent's base would be a suitable base for an
914 # intermediate snapshot.
914 # intermediate snapshot.
915 #
915 #
916 # It give a chance to reuse a delta chain unrelated to the current
916 # It give a chance to reuse a delta chain unrelated to the current
917 # revisions instead of starting our own. Without such re-use,
917 # revisions instead of starting our own. Without such re-use,
918 # topological branches would keep reopening new full chains. Creating
918 # topological branches would keep reopening new full chains. Creating
919 # more and more snapshot as the repository grow.
919 # more and more snapshot as the repository grow.
920 yield tuple(snapshots[nullrev])
920 yield tuple(snapshots[nullrev])
921
921
922 if not sparse:
922 if not sparse:
923 # other approach failed try against prev to hopefully save us a
923 # other approach failed try against prev to hopefully save us a
924 # fulltext.
924 # fulltext.
925 yield (prev,)
925 yield (prev,)
926
926
927
927
928 class deltacomputer(object):
928 class deltacomputer(object):
929 def __init__(self, revlog):
929 def __init__(self, revlog):
930 self.revlog = revlog
930 self.revlog = revlog
931
931
932 def buildtext(self, revinfo, fh):
932 def buildtext(self, revinfo, fh):
933 """Builds a fulltext version of a revision
933 """Builds a fulltext version of a revision
934
934
935 revinfo: revisioninfo instance that contains all needed info
935 revinfo: revisioninfo instance that contains all needed info
936 fh: file handle to either the .i or the .d revlog file,
936 fh: file handle to either the .i or the .d revlog file,
937 depending on whether it is inlined or not
937 depending on whether it is inlined or not
938 """
938 """
939 btext = revinfo.btext
939 btext = revinfo.btext
940 if btext[0] is not None:
940 if btext[0] is not None:
941 return btext[0]
941 return btext[0]
942
942
943 revlog = self.revlog
943 revlog = self.revlog
944 cachedelta = revinfo.cachedelta
944 cachedelta = revinfo.cachedelta
945 baserev = cachedelta[0]
945 baserev = cachedelta[0]
946 delta = cachedelta[1]
946 delta = cachedelta[1]
947
947
948 fulltext = btext[0] = _textfromdelta(
948 fulltext = btext[0] = _textfromdelta(
949 fh,
949 fh,
950 revlog,
950 revlog,
951 baserev,
951 baserev,
952 delta,
952 delta,
953 revinfo.p1,
953 revinfo.p1,
954 revinfo.p2,
954 revinfo.p2,
955 revinfo.flags,
955 revinfo.flags,
956 revinfo.node,
956 revinfo.node,
957 )
957 )
958 return fulltext
958 return fulltext
959
959
960 def _builddeltadiff(self, base, revinfo, fh):
960 def _builddeltadiff(self, base, revinfo, fh):
961 revlog = self.revlog
961 revlog = self.revlog
962 t = self.buildtext(revinfo, fh)
962 t = self.buildtext(revinfo, fh)
963 if revlog.iscensored(base):
963 if revlog.iscensored(base):
964 # deltas based on a censored revision must replace the
964 # deltas based on a censored revision must replace the
965 # full content in one patch, so delta works everywhere
965 # full content in one patch, so delta works everywhere
966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
966 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
967 delta = header + t
967 delta = header + t
968 else:
968 else:
969 ptext = revlog.rawdata(base, _df=fh)
969 ptext = revlog.rawdata(base, _df=fh)
970 delta = mdiff.textdiff(ptext, t)
970 delta = mdiff.textdiff(ptext, t)
971
971
972 return delta
972 return delta
973
973
974 def _builddeltainfo(self, revinfo, base, fh):
974 def _builddeltainfo(self, revinfo, base, fh):
975 # can we use the cached delta?
975 # can we use the cached delta?
976 revlog = self.revlog
976 revlog = self.revlog
977 chainbase = revlog.chainbase(base)
977 chainbase = revlog.chainbase(base)
978 if revlog._generaldelta:
978 if revlog._generaldelta:
979 deltabase = base
979 deltabase = base
980 else:
980 else:
981 deltabase = chainbase
981 deltabase = chainbase
982 snapshotdepth = None
982 snapshotdepth = None
983 if revlog._sparserevlog and deltabase == nullrev:
983 if revlog._sparserevlog and deltabase == nullrev:
984 snapshotdepth = 0
984 snapshotdepth = 0
985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
985 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
986 # A delta chain should always be one full snapshot,
986 # A delta chain should always be one full snapshot,
987 # zero or more semi-snapshots, and zero or more deltas
987 # zero or more semi-snapshots, and zero or more deltas
988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
988 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
989 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
990 snapshotdepth = len(revlog._deltachain(deltabase)[0])
991 delta = None
991 delta = None
992 if revinfo.cachedelta:
992 if revinfo.cachedelta:
993 cachebase, cachediff = revinfo.cachedelta
993 cachebase, cachediff = revinfo.cachedelta
994 # check if the diff still apply
994 # check if the diff still apply
995 currentbase = cachebase
995 currentbase = cachebase
996 while (
996 while (
997 currentbase != nullrev
997 currentbase != nullrev
998 and currentbase != base
998 and currentbase != base
999 and self.revlog.length(currentbase) == 0
999 and self.revlog.length(currentbase) == 0
1000 ):
1000 ):
1001 currentbase = self.revlog.deltaparent(currentbase)
1001 currentbase = self.revlog.deltaparent(currentbase)
1002 if self.revlog._lazydelta and currentbase == base:
1002 if self.revlog._lazydelta and currentbase == base:
1003 delta = revinfo.cachedelta[1]
1003 delta = revinfo.cachedelta[1]
1004 if delta is None:
1004 if delta is None:
1005 delta = self._builddeltadiff(base, revinfo, fh)
1005 delta = self._builddeltadiff(base, revinfo, fh)
1006 # snapshotdept need to be neither None nor 0 level snapshot
1006 # snapshotdept need to be neither None nor 0 level snapshot
1007 if revlog.upperboundcomp is not None and snapshotdepth:
1007 if revlog.upperboundcomp is not None and snapshotdepth:
1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1008 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1009 snapshotlimit = revinfo.textlen >> snapshotdepth
1009 snapshotlimit = revinfo.textlen >> snapshotdepth
1010 if snapshotlimit < lowestrealisticdeltalen:
1010 if snapshotlimit < lowestrealisticdeltalen:
1011 return None
1011 return None
1012 if revlog.length(base) < lowestrealisticdeltalen:
1012 if revlog.length(base) < lowestrealisticdeltalen:
1013 return None
1013 return None
1014 header, data = revlog.compress(delta)
1014 header, data = revlog.compress(delta)
1015 deltalen = len(header) + len(data)
1015 deltalen = len(header) + len(data)
1016 offset = revlog.end(len(revlog) - 1)
1016 offset = revlog.end(len(revlog) - 1)
1017 dist = deltalen + offset - revlog.start(chainbase)
1017 dist = deltalen + offset - revlog.start(chainbase)
1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1018 chainlen, compresseddeltalen = revlog._chaininfo(base)
1019 chainlen += 1
1019 chainlen += 1
1020 compresseddeltalen += deltalen
1020 compresseddeltalen += deltalen
1021
1021
1022 return _deltainfo(
1022 return _deltainfo(
1023 dist,
1023 dist,
1024 deltalen,
1024 deltalen,
1025 (header, data),
1025 (header, data),
1026 deltabase,
1026 deltabase,
1027 chainbase,
1027 chainbase,
1028 chainlen,
1028 chainlen,
1029 compresseddeltalen,
1029 compresseddeltalen,
1030 snapshotdepth,
1030 snapshotdepth,
1031 )
1031 )
1032
1032
1033 def _fullsnapshotinfo(self, fh, revinfo):
1033 def _fullsnapshotinfo(self, fh, revinfo):
1034 curr = len(self.revlog)
1034 curr = len(self.revlog)
1035 rawtext = self.buildtext(revinfo, fh)
1035 rawtext = self.buildtext(revinfo, fh)
1036 data = self.revlog.compress(rawtext)
1036 data = self.revlog.compress(rawtext)
1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1037 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1038 deltabase = chainbase = curr
1038 deltabase = chainbase = curr
1039 snapshotdepth = 0
1039 snapshotdepth = 0
1040 chainlen = 1
1040 chainlen = 1
1041
1041
1042 return _deltainfo(
1042 return _deltainfo(
1043 dist,
1043 dist,
1044 deltalen,
1044 deltalen,
1045 data,
1045 data,
1046 deltabase,
1046 deltabase,
1047 chainbase,
1047 chainbase,
1048 chainlen,
1048 chainlen,
1049 compresseddeltalen,
1049 compresseddeltalen,
1050 snapshotdepth,
1050 snapshotdepth,
1051 )
1051 )
1052
1052
1053 def finddeltainfo(self, revinfo, fh):
1053 def finddeltainfo(self, revinfo, fh, excluded_bases=None):
1054 """Find an acceptable delta against a candidate revision
1054 """Find an acceptable delta against a candidate revision
1055
1055
1056 revinfo: information about the revision (instance of _revisioninfo)
1056 revinfo: information about the revision (instance of _revisioninfo)
1057 fh: file handle to either the .i or the .d revlog file,
1057 fh: file handle to either the .i or the .d revlog file,
1058 depending on whether it is inlined or not
1058 depending on whether it is inlined or not
1059
1059
1060 Returns the first acceptable candidate revision, as ordered by
1060 Returns the first acceptable candidate revision, as ordered by
1061 _candidategroups
1061 _candidategroups
1062
1062
1063 If no suitable deltabase is found, we return delta info for a full
1063 If no suitable deltabase is found, we return delta info for a full
1064 snapshot.
1064 snapshot.
1065
1066 `excluded_bases` is an optional set of revision that cannot be used as
1067 a delta base. Use this to recompute delta suitable in censor or strip
1068 context.
1065 """
1069 """
1066 if not revinfo.textlen:
1070 if not revinfo.textlen:
1067 return self._fullsnapshotinfo(fh, revinfo)
1071 return self._fullsnapshotinfo(fh, revinfo)
1068
1072
1073 if excluded_bases is None:
1074 excluded_bases = set()
1075
1069 # no delta for flag processor revision (see "candelta" for why)
1076 # no delta for flag processor revision (see "candelta" for why)
1070 # not calling candelta since only one revision needs test, also to
1077 # not calling candelta since only one revision needs test, also to
1071 # avoid overhead fetching flags again.
1078 # avoid overhead fetching flags again.
1072 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1079 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1073 return self._fullsnapshotinfo(fh, revinfo)
1080 return self._fullsnapshotinfo(fh, revinfo)
1074
1081
1075 cachedelta = revinfo.cachedelta
1082 cachedelta = revinfo.cachedelta
1076 p1 = revinfo.p1
1083 p1 = revinfo.p1
1077 p2 = revinfo.p2
1084 p2 = revinfo.p2
1078 revlog = self.revlog
1085 revlog = self.revlog
1079
1086
1080 deltainfo = None
1087 deltainfo = None
1081 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1088 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1082 groups = _candidategroups(
1089 groups = _candidategroups(
1083 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1090 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1084 )
1091 )
1085 candidaterevs = next(groups)
1092 candidaterevs = next(groups)
1086 while candidaterevs is not None:
1093 while candidaterevs is not None:
1087 nominateddeltas = []
1094 nominateddeltas = []
1088 if deltainfo is not None:
1095 if deltainfo is not None:
1089 # if we already found a good delta,
1096 # if we already found a good delta,
1090 # challenge it against refined candidates
1097 # challenge it against refined candidates
1091 nominateddeltas.append(deltainfo)
1098 nominateddeltas.append(deltainfo)
1092 for candidaterev in candidaterevs:
1099 for candidaterev in candidaterevs:
1100 if candidaterev in excluded_bases:
1101 continue
1093 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1102 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1094 if candidatedelta is not None:
1103 if candidatedelta is not None:
1095 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1104 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1096 nominateddeltas.append(candidatedelta)
1105 nominateddeltas.append(candidatedelta)
1097 if nominateddeltas:
1106 if nominateddeltas:
1098 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1107 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1099 if deltainfo is not None:
1108 if deltainfo is not None:
1100 candidaterevs = groups.send(deltainfo.base)
1109 candidaterevs = groups.send(deltainfo.base)
1101 else:
1110 else:
1102 candidaterevs = next(groups)
1111 candidaterevs = next(groups)
1103
1112
1104 if deltainfo is None:
1113 if deltainfo is None:
1105 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1114 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1106 return deltainfo
1115 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now