##// END OF EJS Templates
revlog: limit base to rev size ratio to 500 instead of 50...
Boris Feld -
r41062:b3734779 default
parent child Browse files
Show More
@@ -1,1000 +1,1000 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 )
33 )
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38 class _testrevlog(object):
38 class _testrevlog(object):
39 """minimalist fake revlog to use in doctests"""
39 """minimalist fake revlog to use in doctests"""
40
40
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 """data is an list of revision payload boundaries"""
42 """data is an list of revision payload boundaries"""
43 self._data = data
43 self._data = data
44 self._srdensitythreshold = density
44 self._srdensitythreshold = density
45 self._srmingapsize = mingap
45 self._srmingapsize = mingap
46 self._snapshot = set(snapshot)
46 self._snapshot = set(snapshot)
47 self.index = None
47 self.index = None
48
48
49 def start(self, rev):
49 def start(self, rev):
50 if rev == 0:
50 if rev == 0:
51 return 0
51 return 0
52 return self._data[rev - 1]
52 return self._data[rev - 1]
53
53
54 def end(self, rev):
54 def end(self, rev):
55 return self._data[rev]
55 return self._data[rev]
56
56
57 def length(self, rev):
57 def length(self, rev):
58 return self.end(rev) - self.start(rev)
58 return self.end(rev) - self.start(rev)
59
59
60 def __len__(self):
60 def __len__(self):
61 return len(self._data)
61 return len(self._data)
62
62
63 def issnapshot(self, rev):
63 def issnapshot(self, rev):
64 return rev in self._snapshot
64 return rev in self._snapshot
65
65
66 def slicechunk(revlog, revs, targetsize=None):
66 def slicechunk(revlog, revs, targetsize=None):
67 """slice revs to reduce the amount of unrelated data to be read from disk.
67 """slice revs to reduce the amount of unrelated data to be read from disk.
68
68
69 ``revs`` is sliced into groups that should be read in one time.
69 ``revs`` is sliced into groups that should be read in one time.
70 Assume that revs are sorted.
70 Assume that revs are sorted.
71
71
72 The initial chunk is sliced until the overall density (payload/chunks-span
72 The initial chunk is sliced until the overall density (payload/chunks-span
73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
74 `revlog._srmingapsize` is skipped.
74 `revlog._srmingapsize` is skipped.
75
75
76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
77 For consistency with other slicing choice, this limit won't go lower than
77 For consistency with other slicing choice, this limit won't go lower than
78 `revlog._srmingapsize`.
78 `revlog._srmingapsize`.
79
79
80 If individual revisions chunk are larger than this limit, they will still
80 If individual revisions chunk are larger than this limit, they will still
81 be raised individually.
81 be raised individually.
82
82
83 >>> data = [
83 >>> data = [
84 ... 5, #00 (5)
84 ... 5, #00 (5)
85 ... 10, #01 (5)
85 ... 10, #01 (5)
86 ... 12, #02 (2)
86 ... 12, #02 (2)
87 ... 12, #03 (empty)
87 ... 12, #03 (empty)
88 ... 27, #04 (15)
88 ... 27, #04 (15)
89 ... 31, #05 (4)
89 ... 31, #05 (4)
90 ... 31, #06 (empty)
90 ... 31, #06 (empty)
91 ... 42, #07 (11)
91 ... 42, #07 (11)
92 ... 47, #08 (5)
92 ... 47, #08 (5)
93 ... 47, #09 (empty)
93 ... 47, #09 (empty)
94 ... 48, #10 (1)
94 ... 48, #10 (1)
95 ... 51, #11 (3)
95 ... 51, #11 (3)
96 ... 74, #12 (23)
96 ... 74, #12 (23)
97 ... 85, #13 (11)
97 ... 85, #13 (11)
98 ... 86, #14 (1)
98 ... 86, #14 (1)
99 ... 91, #15 (5)
99 ... 91, #15 (5)
100 ... ]
100 ... ]
101 >>> revlog = _testrevlog(data, snapshot=range(16))
101 >>> revlog = _testrevlog(data, snapshot=range(16))
102
102
103 >>> list(slicechunk(revlog, list(range(16))))
103 >>> list(slicechunk(revlog, list(range(16))))
104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
105 >>> list(slicechunk(revlog, [0, 15]))
105 >>> list(slicechunk(revlog, [0, 15]))
106 [[0], [15]]
106 [[0], [15]]
107 >>> list(slicechunk(revlog, [0, 11, 15]))
107 >>> list(slicechunk(revlog, [0, 11, 15]))
108 [[0], [11], [15]]
108 [[0], [11], [15]]
109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
110 [[0], [11, 13, 15]]
110 [[0], [11, 13, 15]]
111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
112 [[1, 2], [5, 8, 10, 11], [14]]
112 [[1, 2], [5, 8, 10, 11], [14]]
113
113
114 Slicing with a maximum chunk size
114 Slicing with a maximum chunk size
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
116 [[0], [11], [13], [15]]
116 [[0], [11], [13], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
118 [[0], [11], [13, 15]]
118 [[0], [11], [13, 15]]
119 """
119 """
120 if targetsize is not None:
120 if targetsize is not None:
121 targetsize = max(targetsize, revlog._srmingapsize)
121 targetsize = max(targetsize, revlog._srmingapsize)
122 # targetsize should not be specified when evaluating delta candidates:
122 # targetsize should not be specified when evaluating delta candidates:
123 # * targetsize is used to ensure we stay within specification when reading,
123 # * targetsize is used to ensure we stay within specification when reading,
124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
125 if densityslicing is None:
125 if densityslicing is None:
126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
127 for chunk in densityslicing(revs,
127 for chunk in densityslicing(revs,
128 revlog._srdensitythreshold,
128 revlog._srdensitythreshold,
129 revlog._srmingapsize):
129 revlog._srmingapsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
131 yield subchunk
131 yield subchunk
132
132
133 def _slicechunktosize(revlog, revs, targetsize=None):
133 def _slicechunktosize(revlog, revs, targetsize=None):
134 """slice revs to match the target size
134 """slice revs to match the target size
135
135
136 This is intended to be used on chunk that density slicing selected by that
136 This is intended to be used on chunk that density slicing selected by that
137 are still too large compared to the read garantee of revlog. This might
137 are still too large compared to the read garantee of revlog. This might
138 happens when "minimal gap size" interrupted the slicing or when chain are
138 happens when "minimal gap size" interrupted the slicing or when chain are
139 built in a way that create large blocks next to each other.
139 built in a way that create large blocks next to each other.
140
140
141 >>> data = [
141 >>> data = [
142 ... 3, #0 (3)
142 ... 3, #0 (3)
143 ... 5, #1 (2)
143 ... 5, #1 (2)
144 ... 6, #2 (1)
144 ... 6, #2 (1)
145 ... 8, #3 (2)
145 ... 8, #3 (2)
146 ... 8, #4 (empty)
146 ... 8, #4 (empty)
147 ... 11, #5 (3)
147 ... 11, #5 (3)
148 ... 12, #6 (1)
148 ... 12, #6 (1)
149 ... 13, #7 (1)
149 ... 13, #7 (1)
150 ... 14, #8 (1)
150 ... 14, #8 (1)
151 ... ]
151 ... ]
152
152
153 == All snapshots cases ==
153 == All snapshots cases ==
154 >>> revlog = _testrevlog(data, snapshot=range(9))
154 >>> revlog = _testrevlog(data, snapshot=range(9))
155
155
156 Cases where chunk is already small enough
156 Cases where chunk is already small enough
157 >>> list(_slicechunktosize(revlog, [0], 3))
157 >>> list(_slicechunktosize(revlog, [0], 3))
158 [[0]]
158 [[0]]
159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
160 [[6, 7]]
160 [[6, 7]]
161 >>> list(_slicechunktosize(revlog, [0], None))
161 >>> list(_slicechunktosize(revlog, [0], None))
162 [[0]]
162 [[0]]
163 >>> list(_slicechunktosize(revlog, [6, 7], None))
163 >>> list(_slicechunktosize(revlog, [6, 7], None))
164 [[6, 7]]
164 [[6, 7]]
165
165
166 cases where we need actual slicing
166 cases where we need actual slicing
167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
168 [[0], [1]]
168 [[0], [1]]
169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
170 [[1], [3]]
170 [[1], [3]]
171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
172 [[1, 2], [3]]
172 [[1, 2], [3]]
173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
174 [[3], [5]]
174 [[3], [5]]
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
176 [[3], [5]]
176 [[3], [5]]
177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
178 [[5], [6, 7, 8]]
178 [[5], [6, 7, 8]]
179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
180 [[0], [1, 2], [3], [5], [6, 7, 8]]
180 [[0], [1, 2], [3], [5], [6, 7, 8]]
181
181
182 Case with too large individual chunk (must return valid chunk)
182 Case with too large individual chunk (must return valid chunk)
183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
184 [[0], [1]]
184 [[0], [1]]
185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
186 [[1], [3]]
186 [[1], [3]]
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
188 [[3], [5]]
188 [[3], [5]]
189
189
190 == No Snapshot cases ==
190 == No Snapshot cases ==
191 >>> revlog = _testrevlog(data)
191 >>> revlog = _testrevlog(data)
192
192
193 Cases where chunk is already small enough
193 Cases where chunk is already small enough
194 >>> list(_slicechunktosize(revlog, [0], 3))
194 >>> list(_slicechunktosize(revlog, [0], 3))
195 [[0]]
195 [[0]]
196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
197 [[6, 7]]
197 [[6, 7]]
198 >>> list(_slicechunktosize(revlog, [0], None))
198 >>> list(_slicechunktosize(revlog, [0], None))
199 [[0]]
199 [[0]]
200 >>> list(_slicechunktosize(revlog, [6, 7], None))
200 >>> list(_slicechunktosize(revlog, [6, 7], None))
201 [[6, 7]]
201 [[6, 7]]
202
202
203 cases where we need actual slicing
203 cases where we need actual slicing
204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
209 [[1], [2, 3]]
209 [[1], [2, 3]]
210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
211 [[3], [5]]
211 [[3], [5]]
212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
213 [[3], [4, 5]]
213 [[3], [4, 5]]
214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
215 [[5], [6, 7, 8]]
215 [[5], [6, 7, 8]]
216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
217 [[0], [1, 2], [3], [5], [6, 7, 8]]
217 [[0], [1, 2], [3], [5], [6, 7, 8]]
218
218
219 Case with too large individual chunk (must return valid chunk)
219 Case with too large individual chunk (must return valid chunk)
220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
221 [[0], [1]]
221 [[0], [1]]
222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
223 [[1], [3]]
223 [[1], [3]]
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
225 [[3], [5]]
225 [[3], [5]]
226
226
227 == mixed case ==
227 == mixed case ==
228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
231 """
231 """
232 assert targetsize is None or 0 <= targetsize
232 assert targetsize is None or 0 <= targetsize
233 startdata = revlog.start(revs[0])
233 startdata = revlog.start(revs[0])
234 enddata = revlog.end(revs[-1])
234 enddata = revlog.end(revs[-1])
235 fullspan = enddata - startdata
235 fullspan = enddata - startdata
236 if targetsize is None or fullspan <= targetsize:
236 if targetsize is None or fullspan <= targetsize:
237 yield revs
237 yield revs
238 return
238 return
239
239
240 startrevidx = 0
240 startrevidx = 0
241 endrevidx = 1
241 endrevidx = 1
242 iterrevs = enumerate(revs)
242 iterrevs = enumerate(revs)
243 next(iterrevs) # skip first rev.
243 next(iterrevs) # skip first rev.
244 # first step: get snapshots out of the way
244 # first step: get snapshots out of the way
245 for idx, r in iterrevs:
245 for idx, r in iterrevs:
246 span = revlog.end(r) - startdata
246 span = revlog.end(r) - startdata
247 snapshot = revlog.issnapshot(r)
247 snapshot = revlog.issnapshot(r)
248 if span <= targetsize and snapshot:
248 if span <= targetsize and snapshot:
249 endrevidx = idx + 1
249 endrevidx = idx + 1
250 else:
250 else:
251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
252 if chunk:
252 if chunk:
253 yield chunk
253 yield chunk
254 startrevidx = idx
254 startrevidx = idx
255 startdata = revlog.start(r)
255 startdata = revlog.start(r)
256 endrevidx = idx + 1
256 endrevidx = idx + 1
257 if not snapshot:
257 if not snapshot:
258 break
258 break
259
259
260 # for the others, we use binary slicing to quickly converge toward valid
260 # for the others, we use binary slicing to quickly converge toward valid
261 # chunks (otherwise, we might end up looking for start/end of many
261 # chunks (otherwise, we might end up looking for start/end of many
262 # revisions). This logic is not looking for the perfect slicing point, it
262 # revisions). This logic is not looking for the perfect slicing point, it
263 # focuses on quickly converging toward valid chunks.
263 # focuses on quickly converging toward valid chunks.
264 nbitem = len(revs)
264 nbitem = len(revs)
265 while (enddata - startdata) > targetsize:
265 while (enddata - startdata) > targetsize:
266 endrevidx = nbitem
266 endrevidx = nbitem
267 if nbitem - startrevidx <= 1:
267 if nbitem - startrevidx <= 1:
268 break # protect against individual chunk larger than limit
268 break # protect against individual chunk larger than limit
269 localenddata = revlog.end(revs[endrevidx - 1])
269 localenddata = revlog.end(revs[endrevidx - 1])
270 span = localenddata - startdata
270 span = localenddata - startdata
271 while span > targetsize:
271 while span > targetsize:
272 if endrevidx - startrevidx <= 1:
272 if endrevidx - startrevidx <= 1:
273 break # protect against individual chunk larger than limit
273 break # protect against individual chunk larger than limit
274 endrevidx -= (endrevidx - startrevidx) // 2
274 endrevidx -= (endrevidx - startrevidx) // 2
275 localenddata = revlog.end(revs[endrevidx - 1])
275 localenddata = revlog.end(revs[endrevidx - 1])
276 span = localenddata - startdata
276 span = localenddata - startdata
277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
278 if chunk:
278 if chunk:
279 yield chunk
279 yield chunk
280 startrevidx = endrevidx
280 startrevidx = endrevidx
281 startdata = revlog.start(revs[startrevidx])
281 startdata = revlog.start(revs[startrevidx])
282
282
283 chunk = _trimchunk(revlog, revs, startrevidx)
283 chunk = _trimchunk(revlog, revs, startrevidx)
284 if chunk:
284 if chunk:
285 yield chunk
285 yield chunk
286
286
287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
288 mingapsize=0):
288 mingapsize=0):
289 """slice revs to reduce the amount of unrelated data to be read from disk.
289 """slice revs to reduce the amount of unrelated data to be read from disk.
290
290
291 ``revs`` is sliced into groups that should be read in one time.
291 ``revs`` is sliced into groups that should be read in one time.
292 Assume that revs are sorted.
292 Assume that revs are sorted.
293
293
294 The initial chunk is sliced until the overall density (payload/chunks-span
294 The initial chunk is sliced until the overall density (payload/chunks-span
295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
296 skipped.
296 skipped.
297
297
298 >>> revlog = _testrevlog([
298 >>> revlog = _testrevlog([
299 ... 5, #00 (5)
299 ... 5, #00 (5)
300 ... 10, #01 (5)
300 ... 10, #01 (5)
301 ... 12, #02 (2)
301 ... 12, #02 (2)
302 ... 12, #03 (empty)
302 ... 12, #03 (empty)
303 ... 27, #04 (15)
303 ... 27, #04 (15)
304 ... 31, #05 (4)
304 ... 31, #05 (4)
305 ... 31, #06 (empty)
305 ... 31, #06 (empty)
306 ... 42, #07 (11)
306 ... 42, #07 (11)
307 ... 47, #08 (5)
307 ... 47, #08 (5)
308 ... 47, #09 (empty)
308 ... 47, #09 (empty)
309 ... 48, #10 (1)
309 ... 48, #10 (1)
310 ... 51, #11 (3)
310 ... 51, #11 (3)
311 ... 74, #12 (23)
311 ... 74, #12 (23)
312 ... 85, #13 (11)
312 ... 85, #13 (11)
313 ... 86, #14 (1)
313 ... 86, #14 (1)
314 ... 91, #15 (5)
314 ... 91, #15 (5)
315 ... ])
315 ... ])
316
316
317 >>> list(_slicechunktodensity(revlog, list(range(16))))
317 >>> list(_slicechunktodensity(revlog, list(range(16))))
318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
319 >>> list(_slicechunktodensity(revlog, [0, 15]))
319 >>> list(_slicechunktodensity(revlog, [0, 15]))
320 [[0], [15]]
320 [[0], [15]]
321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
322 [[0], [11], [15]]
322 [[0], [11], [15]]
323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
324 [[0], [11, 13, 15]]
324 [[0], [11, 13, 15]]
325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
326 [[1, 2], [5, 8, 10, 11], [14]]
326 [[1, 2], [5, 8, 10, 11], [14]]
327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
328 ... mingapsize=20))
328 ... mingapsize=20))
329 [[1, 2, 3, 5, 8, 10, 11], [14]]
329 [[1, 2, 3, 5, 8, 10, 11], [14]]
330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
331 ... targetdensity=0.95))
331 ... targetdensity=0.95))
332 [[1, 2], [5], [8, 10, 11], [14]]
332 [[1, 2], [5], [8, 10, 11], [14]]
333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
334 ... targetdensity=0.95, mingapsize=12))
334 ... targetdensity=0.95, mingapsize=12))
335 [[1, 2], [5, 8, 10, 11], [14]]
335 [[1, 2], [5, 8, 10, 11], [14]]
336 """
336 """
337 start = revlog.start
337 start = revlog.start
338 length = revlog.length
338 length = revlog.length
339
339
340 if len(revs) <= 1:
340 if len(revs) <= 1:
341 yield revs
341 yield revs
342 return
342 return
343
343
344 deltachainspan = segmentspan(revlog, revs)
344 deltachainspan = segmentspan(revlog, revs)
345
345
346 if deltachainspan < mingapsize:
346 if deltachainspan < mingapsize:
347 yield revs
347 yield revs
348 return
348 return
349
349
350 readdata = deltachainspan
350 readdata = deltachainspan
351 chainpayload = sum(length(r) for r in revs)
351 chainpayload = sum(length(r) for r in revs)
352
352
353 if deltachainspan:
353 if deltachainspan:
354 density = chainpayload / float(deltachainspan)
354 density = chainpayload / float(deltachainspan)
355 else:
355 else:
356 density = 1.0
356 density = 1.0
357
357
358 if density >= targetdensity:
358 if density >= targetdensity:
359 yield revs
359 yield revs
360 return
360 return
361
361
362 # Store the gaps in a heap to have them sorted by decreasing size
362 # Store the gaps in a heap to have them sorted by decreasing size
363 gaps = []
363 gaps = []
364 prevend = None
364 prevend = None
365 for i, rev in enumerate(revs):
365 for i, rev in enumerate(revs):
366 revstart = start(rev)
366 revstart = start(rev)
367 revlen = length(rev)
367 revlen = length(rev)
368
368
369 # Skip empty revisions to form larger holes
369 # Skip empty revisions to form larger holes
370 if revlen == 0:
370 if revlen == 0:
371 continue
371 continue
372
372
373 if prevend is not None:
373 if prevend is not None:
374 gapsize = revstart - prevend
374 gapsize = revstart - prevend
375 # only consider holes that are large enough
375 # only consider holes that are large enough
376 if gapsize > mingapsize:
376 if gapsize > mingapsize:
377 gaps.append((gapsize, i))
377 gaps.append((gapsize, i))
378
378
379 prevend = revstart + revlen
379 prevend = revstart + revlen
380 # sort the gaps to pop them from largest to small
380 # sort the gaps to pop them from largest to small
381 gaps.sort()
381 gaps.sort()
382
382
383 # Collect the indices of the largest holes until the density is acceptable
383 # Collect the indices of the largest holes until the density is acceptable
384 selected = []
384 selected = []
385 while gaps and density < targetdensity:
385 while gaps and density < targetdensity:
386 gapsize, gapidx = gaps.pop()
386 gapsize, gapidx = gaps.pop()
387
387
388 selected.append(gapidx)
388 selected.append(gapidx)
389
389
390 # the gap sizes are stored as negatives to be sorted decreasingly
390 # the gap sizes are stored as negatives to be sorted decreasingly
391 # by the heap
391 # by the heap
392 readdata -= gapsize
392 readdata -= gapsize
393 if readdata > 0:
393 if readdata > 0:
394 density = chainpayload / float(readdata)
394 density = chainpayload / float(readdata)
395 else:
395 else:
396 density = 1.0
396 density = 1.0
397 selected.sort()
397 selected.sort()
398
398
399 # Cut the revs at collected indices
399 # Cut the revs at collected indices
400 previdx = 0
400 previdx = 0
401 for idx in selected:
401 for idx in selected:
402
402
403 chunk = _trimchunk(revlog, revs, previdx, idx)
403 chunk = _trimchunk(revlog, revs, previdx, idx)
404 if chunk:
404 if chunk:
405 yield chunk
405 yield chunk
406
406
407 previdx = idx
407 previdx = idx
408
408
409 chunk = _trimchunk(revlog, revs, previdx)
409 chunk = _trimchunk(revlog, revs, previdx)
410 if chunk:
410 if chunk:
411 yield chunk
411 yield chunk
412
412
413 def _trimchunk(revlog, revs, startidx, endidx=None):
413 def _trimchunk(revlog, revs, startidx, endidx=None):
414 """returns revs[startidx:endidx] without empty trailing revs
414 """returns revs[startidx:endidx] without empty trailing revs
415
415
416 Doctest Setup
416 Doctest Setup
417 >>> revlog = _testrevlog([
417 >>> revlog = _testrevlog([
418 ... 5, #0
418 ... 5, #0
419 ... 10, #1
419 ... 10, #1
420 ... 12, #2
420 ... 12, #2
421 ... 12, #3 (empty)
421 ... 12, #3 (empty)
422 ... 17, #4
422 ... 17, #4
423 ... 21, #5
423 ... 21, #5
424 ... 21, #6 (empty)
424 ... 21, #6 (empty)
425 ... ])
425 ... ])
426
426
427 Contiguous cases:
427 Contiguous cases:
428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
429 [0, 1, 2, 3, 4, 5]
429 [0, 1, 2, 3, 4, 5]
430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
431 [0, 1, 2, 3, 4]
431 [0, 1, 2, 3, 4]
432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
433 [0, 1, 2]
433 [0, 1, 2]
434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
435 [2]
435 [2]
436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
437 [3, 4, 5]
437 [3, 4, 5]
438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
439 [3, 4]
439 [3, 4]
440
440
441 Discontiguous cases:
441 Discontiguous cases:
442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
443 [1, 3, 5]
443 [1, 3, 5]
444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
445 [1]
445 [1]
446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
447 [3, 5]
447 [3, 5]
448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
449 [3, 5]
449 [3, 5]
450 """
450 """
451 length = revlog.length
451 length = revlog.length
452
452
453 if endidx is None:
453 if endidx is None:
454 endidx = len(revs)
454 endidx = len(revs)
455
455
456 # If we have a non-emtpy delta candidate, there are nothing to trim
456 # If we have a non-emtpy delta candidate, there are nothing to trim
457 if revs[endidx - 1] < len(revlog):
457 if revs[endidx - 1] < len(revlog):
458 # Trim empty revs at the end, except the very first revision of a chain
458 # Trim empty revs at the end, except the very first revision of a chain
459 while (endidx > 1
459 while (endidx > 1
460 and endidx > startidx
460 and endidx > startidx
461 and length(revs[endidx - 1]) == 0):
461 and length(revs[endidx - 1]) == 0):
462 endidx -= 1
462 endidx -= 1
463
463
464 return revs[startidx:endidx]
464 return revs[startidx:endidx]
465
465
466 def segmentspan(revlog, revs):
466 def segmentspan(revlog, revs):
467 """Get the byte span of a segment of revisions
467 """Get the byte span of a segment of revisions
468
468
469 revs is a sorted array of revision numbers
469 revs is a sorted array of revision numbers
470
470
471 >>> revlog = _testrevlog([
471 >>> revlog = _testrevlog([
472 ... 5, #0
472 ... 5, #0
473 ... 10, #1
473 ... 10, #1
474 ... 12, #2
474 ... 12, #2
475 ... 12, #3 (empty)
475 ... 12, #3 (empty)
476 ... 17, #4
476 ... 17, #4
477 ... ])
477 ... ])
478
478
479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
480 17
480 17
481 >>> segmentspan(revlog, [0, 4])
481 >>> segmentspan(revlog, [0, 4])
482 17
482 17
483 >>> segmentspan(revlog, [3, 4])
483 >>> segmentspan(revlog, [3, 4])
484 5
484 5
485 >>> segmentspan(revlog, [1, 2, 3,])
485 >>> segmentspan(revlog, [1, 2, 3,])
486 7
486 7
487 >>> segmentspan(revlog, [1, 3])
487 >>> segmentspan(revlog, [1, 3])
488 7
488 7
489 """
489 """
490 if not revs:
490 if not revs:
491 return 0
491 return 0
492 end = revlog.end(revs[-1])
492 end = revlog.end(revs[-1])
493 return end - revlog.start(revs[0])
493 return end - revlog.start(revs[0])
494
494
495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
496 """build full text from a (base, delta) pair and other metadata"""
496 """build full text from a (base, delta) pair and other metadata"""
497 # special case deltas which replace entire base; no need to decode
497 # special case deltas which replace entire base; no need to decode
498 # base revision. this neatly avoids censored bases, which throw when
498 # base revision. this neatly avoids censored bases, which throw when
499 # they're decoded.
499 # they're decoded.
500 hlen = struct.calcsize(">lll")
500 hlen = struct.calcsize(">lll")
501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
502 len(delta) - hlen):
502 len(delta) - hlen):
503 fulltext = delta[hlen:]
503 fulltext = delta[hlen:]
504 else:
504 else:
505 # deltabase is rawtext before changed by flag processors, which is
505 # deltabase is rawtext before changed by flag processors, which is
506 # equivalent to non-raw text
506 # equivalent to non-raw text
507 basetext = revlog.revision(baserev, _df=fh, raw=False)
507 basetext = revlog.revision(baserev, _df=fh, raw=False)
508 fulltext = mdiff.patch(basetext, delta)
508 fulltext = mdiff.patch(basetext, delta)
509
509
510 try:
510 try:
511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
512 fulltext, validatehash = res
512 fulltext, validatehash = res
513 if validatehash:
513 if validatehash:
514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
515 if flags & REVIDX_ISCENSORED:
515 if flags & REVIDX_ISCENSORED:
516 raise error.StorageError(_('node %s is not censored') %
516 raise error.StorageError(_('node %s is not censored') %
517 expectednode)
517 expectednode)
518 except error.CensoredNodeError:
518 except error.CensoredNodeError:
519 # must pass the censored index flag to add censored revisions
519 # must pass the censored index flag to add censored revisions
520 if not flags & REVIDX_ISCENSORED:
520 if not flags & REVIDX_ISCENSORED:
521 raise
521 raise
522 return fulltext
522 return fulltext
523
523
524 @attr.s(slots=True, frozen=True)
524 @attr.s(slots=True, frozen=True)
525 class _deltainfo(object):
525 class _deltainfo(object):
526 distance = attr.ib()
526 distance = attr.ib()
527 deltalen = attr.ib()
527 deltalen = attr.ib()
528 data = attr.ib()
528 data = attr.ib()
529 base = attr.ib()
529 base = attr.ib()
530 chainbase = attr.ib()
530 chainbase = attr.ib()
531 chainlen = attr.ib()
531 chainlen = attr.ib()
532 compresseddeltalen = attr.ib()
532 compresseddeltalen = attr.ib()
533 snapshotdepth = attr.ib()
533 snapshotdepth = attr.ib()
534
534
535 def isgooddeltainfo(revlog, deltainfo, revinfo):
535 def isgooddeltainfo(revlog, deltainfo, revinfo):
536 """Returns True if the given delta is good. Good means that it is within
536 """Returns True if the given delta is good. Good means that it is within
537 the disk span, disk size, and chain length bounds that we know to be
537 the disk span, disk size, and chain length bounds that we know to be
538 performant."""
538 performant."""
539 if deltainfo is None:
539 if deltainfo is None:
540 return False
540 return False
541
541
542 # - 'deltainfo.distance' is the distance from the base revision --
542 # - 'deltainfo.distance' is the distance from the base revision --
543 # bounding it limits the amount of I/O we need to do.
543 # bounding it limits the amount of I/O we need to do.
544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
545 # deltas we need to apply -- bounding it limits the amount of CPU
545 # deltas we need to apply -- bounding it limits the amount of CPU
546 # we consume.
546 # we consume.
547
547
548 textlen = revinfo.textlen
548 textlen = revinfo.textlen
549 defaultmax = textlen * 4
549 defaultmax = textlen * 4
550 maxdist = revlog._maxdeltachainspan
550 maxdist = revlog._maxdeltachainspan
551 if not maxdist:
551 if not maxdist:
552 maxdist = deltainfo.distance # ensure the conditional pass
552 maxdist = deltainfo.distance # ensure the conditional pass
553 maxdist = max(maxdist, defaultmax)
553 maxdist = max(maxdist, defaultmax)
554
554
555 # Bad delta from read span:
555 # Bad delta from read span:
556 #
556 #
557 # If the span of data read is larger than the maximum allowed.
557 # If the span of data read is larger than the maximum allowed.
558 #
558 #
559 # In the sparse-revlog case, we rely on the associated "sparse reading"
559 # In the sparse-revlog case, we rely on the associated "sparse reading"
560 # to avoid issue related to the span of data. In theory, it would be
560 # to avoid issue related to the span of data. In theory, it would be
561 # possible to build pathological revlog where delta pattern would lead
561 # possible to build pathological revlog where delta pattern would lead
562 # to too many reads. However, they do not happen in practice at all. So
562 # to too many reads. However, they do not happen in practice at all. So
563 # we skip the span check entirely.
563 # we skip the span check entirely.
564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
565 return False
565 return False
566
566
567 # Bad delta from new delta size:
567 # Bad delta from new delta size:
568 #
568 #
569 # If the delta size is larger than the target text, storing the
569 # If the delta size is larger than the target text, storing the
570 # delta will be inefficient.
570 # delta will be inefficient.
571 if textlen < deltainfo.deltalen:
571 if textlen < deltainfo.deltalen:
572 return False
572 return False
573
573
574 # Bad delta from cumulated payload size:
574 # Bad delta from cumulated payload size:
575 #
575 #
576 # If the sum of delta get larger than K * target text length.
576 # If the sum of delta get larger than K * target text length.
577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
578 return False
578 return False
579
579
580 # Bad delta from chain length:
580 # Bad delta from chain length:
581 #
581 #
582 # If the number of delta in the chain gets too high.
582 # If the number of delta in the chain gets too high.
583 if (revlog._maxchainlen
583 if (revlog._maxchainlen
584 and revlog._maxchainlen < deltainfo.chainlen):
584 and revlog._maxchainlen < deltainfo.chainlen):
585 return False
585 return False
586
586
587 # bad delta from intermediate snapshot size limit
587 # bad delta from intermediate snapshot size limit
588 #
588 #
589 # If an intermediate snapshot size is higher than the limit. The
589 # If an intermediate snapshot size is higher than the limit. The
590 # limit exist to prevent endless chain of intermediate delta to be
590 # limit exist to prevent endless chain of intermediate delta to be
591 # created.
591 # created.
592 if (deltainfo.snapshotdepth is not None and
592 if (deltainfo.snapshotdepth is not None and
593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
594 return False
594 return False
595
595
596 # bad delta if new intermediate snapshot is larger than the previous
596 # bad delta if new intermediate snapshot is larger than the previous
597 # snapshot
597 # snapshot
598 if (deltainfo.snapshotdepth
598 if (deltainfo.snapshotdepth
599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
600 return False
600 return False
601
601
602 return True
602 return True
603
603
604 # If a revision's full text is that much bigger than a base candidate full
604 # If a revision's full text is that much bigger than a base candidate full
605 # text's, it is very unlikely that it will produce a valid delta. We no longer
605 # text's, it is very unlikely that it will produce a valid delta. We no longer
606 # consider these candidates.
606 # consider these candidates.
607 LIMIT_BASE2TEXT = 50
607 LIMIT_BASE2TEXT = 500
608
608
609 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
609 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
610 """Provides group of revision to be tested as delta base
610 """Provides group of revision to be tested as delta base
611
611
612 This top level function focus on emitting groups with unique and worthwhile
612 This top level function focus on emitting groups with unique and worthwhile
613 content. See _raw_candidate_groups for details about the group order.
613 content. See _raw_candidate_groups for details about the group order.
614 """
614 """
615 # should we try to build a delta?
615 # should we try to build a delta?
616 if not (len(revlog) and revlog._storedeltachains):
616 if not (len(revlog) and revlog._storedeltachains):
617 yield None
617 yield None
618 return
618 return
619
619
620 deltalength = revlog.length
620 deltalength = revlog.length
621 deltaparent = revlog.deltaparent
621 deltaparent = revlog.deltaparent
622 sparse = revlog._sparserevlog
622 sparse = revlog._sparserevlog
623 good = None
623 good = None
624
624
625 deltas_limit = textlen * LIMIT_DELTA2TEXT
625 deltas_limit = textlen * LIMIT_DELTA2TEXT
626
626
627 tested = set([nullrev])
627 tested = set([nullrev])
628 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
628 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
629 while True:
629 while True:
630 temptative = candidates.send(good)
630 temptative = candidates.send(good)
631 if temptative is None:
631 if temptative is None:
632 break
632 break
633 group = []
633 group = []
634 for rev in temptative:
634 for rev in temptative:
635 # skip over empty delta (no need to include them in a chain)
635 # skip over empty delta (no need to include them in a chain)
636 while (revlog._generaldelta
636 while (revlog._generaldelta
637 and not (rev == nullrev
637 and not (rev == nullrev
638 or rev in tested
638 or rev in tested
639 or deltalength(rev))):
639 or deltalength(rev))):
640 tested.add(rev)
640 tested.add(rev)
641 rev = deltaparent(rev)
641 rev = deltaparent(rev)
642 # no need to try a delta against nullrev, this will be done as a
642 # no need to try a delta against nullrev, this will be done as a
643 # last resort.
643 # last resort.
644 if rev == nullrev:
644 if rev == nullrev:
645 continue
645 continue
646 # filter out revision we tested already
646 # filter out revision we tested already
647 if rev in tested:
647 if rev in tested:
648 continue
648 continue
649 tested.add(rev)
649 tested.add(rev)
650 # filter out delta base that will never produce good delta
650 # filter out delta base that will never produce good delta
651 if deltas_limit < revlog.length(rev):
651 if deltas_limit < revlog.length(rev):
652 continue
652 continue
653 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
653 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
654 continue
654 continue
655 # no delta for rawtext-changing revs (see "candelta" for why)
655 # no delta for rawtext-changing revs (see "candelta" for why)
656 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
656 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
657 continue
657 continue
658 # If we reach here, we are about to build and test a delta.
658 # If we reach here, we are about to build and test a delta.
659 # The delta building process will compute the chaininfo in all
659 # The delta building process will compute the chaininfo in all
660 # case, since that computation is cached, it is fine to access it
660 # case, since that computation is cached, it is fine to access it
661 # here too.
661 # here too.
662 chainlen, chainsize = revlog._chaininfo(rev)
662 chainlen, chainsize = revlog._chaininfo(rev)
663 # if chain will be too long, skip base
663 # if chain will be too long, skip base
664 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
664 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
665 continue
665 continue
666 # if chain already have too much data, skip base
666 # if chain already have too much data, skip base
667 if deltas_limit < chainsize:
667 if deltas_limit < chainsize:
668 continue
668 continue
669 group.append(rev)
669 group.append(rev)
670 if group:
670 if group:
671 # XXX: in the sparse revlog case, group can become large,
671 # XXX: in the sparse revlog case, group can become large,
672 # impacting performances. Some bounding or slicing mecanism
672 # impacting performances. Some bounding or slicing mecanism
673 # would help to reduce this impact.
673 # would help to reduce this impact.
674 good = yield tuple(group)
674 good = yield tuple(group)
675 yield None
675 yield None
676
676
677 def _findsnapshots(revlog, cache, start_rev):
677 def _findsnapshots(revlog, cache, start_rev):
678 """find snapshot from start_rev to tip"""
678 """find snapshot from start_rev to tip"""
679 deltaparent = revlog.deltaparent
679 deltaparent = revlog.deltaparent
680 issnapshot = revlog.issnapshot
680 issnapshot = revlog.issnapshot
681 for rev in revlog.revs(start_rev):
681 for rev in revlog.revs(start_rev):
682 if issnapshot(rev):
682 if issnapshot(rev):
683 cache[deltaparent(rev)].append(rev)
683 cache[deltaparent(rev)].append(rev)
684
684
685 def _refinedgroups(revlog, p1, p2, cachedelta):
685 def _refinedgroups(revlog, p1, p2, cachedelta):
686 good = None
686 good = None
687 # First we try to reuse a the delta contained in the bundle.
687 # First we try to reuse a the delta contained in the bundle.
688 # (or from the source revlog)
688 # (or from the source revlog)
689 #
689 #
690 # This logic only applies to general delta repositories and can be disabled
690 # This logic only applies to general delta repositories and can be disabled
691 # through configuration. Disabling reuse source delta is useful when
691 # through configuration. Disabling reuse source delta is useful when
692 # we want to make sure we recomputed "optimal" deltas.
692 # we want to make sure we recomputed "optimal" deltas.
693 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
693 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
694 # Assume what we received from the server is a good choice
694 # Assume what we received from the server is a good choice
695 # build delta will reuse the cache
695 # build delta will reuse the cache
696 good = yield (cachedelta[0],)
696 good = yield (cachedelta[0],)
697 if good is not None:
697 if good is not None:
698 yield None
698 yield None
699 return
699 return
700 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
700 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
701 good = yield candidates
701 good = yield candidates
702 if good is not None:
702 if good is not None:
703 break
703 break
704
704
705 # If sparse revlog is enabled, we can try to refine the available deltas
705 # If sparse revlog is enabled, we can try to refine the available deltas
706 if not revlog._sparserevlog:
706 if not revlog._sparserevlog:
707 yield None
707 yield None
708 return
708 return
709
709
710 # if we have a refinable value, try to refine it
710 # if we have a refinable value, try to refine it
711 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
711 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
712 # refine snapshot down
712 # refine snapshot down
713 previous = None
713 previous = None
714 while previous != good:
714 while previous != good:
715 previous = good
715 previous = good
716 base = revlog.deltaparent(good)
716 base = revlog.deltaparent(good)
717 if base == nullrev:
717 if base == nullrev:
718 break
718 break
719 good = yield (base,)
719 good = yield (base,)
720 # refine snapshot up
720 # refine snapshot up
721 #
721 #
722 # XXX the _findsnapshots call can be expensive and is "duplicated" with
722 # XXX the _findsnapshots call can be expensive and is "duplicated" with
723 # the one done in `_rawgroups`. Once we start working on performance,
723 # the one done in `_rawgroups`. Once we start working on performance,
724 # we should make the two logics share this computation.
724 # we should make the two logics share this computation.
725 snapshots = collections.defaultdict(list)
725 snapshots = collections.defaultdict(list)
726 _findsnapshots(revlog, snapshots, good + 1)
726 _findsnapshots(revlog, snapshots, good + 1)
727 previous = None
727 previous = None
728 while good != previous:
728 while good != previous:
729 previous = good
729 previous = good
730 children = tuple(sorted(c for c in snapshots[good]))
730 children = tuple(sorted(c for c in snapshots[good]))
731 good = yield children
731 good = yield children
732
732
733 # we have found nothing
733 # we have found nothing
734 yield None
734 yield None
735
735
736 def _rawgroups(revlog, p1, p2, cachedelta):
736 def _rawgroups(revlog, p1, p2, cachedelta):
737 """Provides group of revision to be tested as delta base
737 """Provides group of revision to be tested as delta base
738
738
739 This lower level function focus on emitting delta theorically interresting
739 This lower level function focus on emitting delta theorically interresting
740 without looking it any practical details.
740 without looking it any practical details.
741
741
742 The group order aims at providing fast or small candidates first.
742 The group order aims at providing fast or small candidates first.
743 """
743 """
744 gdelta = revlog._generaldelta
744 gdelta = revlog._generaldelta
745 sparse = revlog._sparserevlog
745 sparse = revlog._sparserevlog
746 curr = len(revlog)
746 curr = len(revlog)
747 prev = curr - 1
747 prev = curr - 1
748 deltachain = lambda rev: revlog._deltachain(rev)[0]
748 deltachain = lambda rev: revlog._deltachain(rev)[0]
749
749
750 if gdelta:
750 if gdelta:
751 # exclude already lazy tested base if any
751 # exclude already lazy tested base if any
752 parents = [p for p in (p1, p2) if p != nullrev]
752 parents = [p for p in (p1, p2) if p != nullrev]
753
753
754 if not revlog._deltabothparents and len(parents) == 2:
754 if not revlog._deltabothparents and len(parents) == 2:
755 parents.sort()
755 parents.sort()
756 # To minimize the chance of having to build a fulltext,
756 # To minimize the chance of having to build a fulltext,
757 # pick first whichever parent is closest to us (max rev)
757 # pick first whichever parent is closest to us (max rev)
758 yield (parents[1],)
758 yield (parents[1],)
759 # then the other one (min rev) if the first did not fit
759 # then the other one (min rev) if the first did not fit
760 yield (parents[0],)
760 yield (parents[0],)
761 elif len(parents) > 0:
761 elif len(parents) > 0:
762 # Test all parents (1 or 2), and keep the best candidate
762 # Test all parents (1 or 2), and keep the best candidate
763 yield parents
763 yield parents
764
764
765 if sparse and parents:
765 if sparse and parents:
766 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
766 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
767 # See if we can use an existing snapshot in the parent chains to use as
767 # See if we can use an existing snapshot in the parent chains to use as
768 # a base for a new intermediate-snapshot
768 # a base for a new intermediate-snapshot
769 #
769 #
770 # search for snapshot in parents delta chain
770 # search for snapshot in parents delta chain
771 # map: snapshot-level: snapshot-rev
771 # map: snapshot-level: snapshot-rev
772 parents_snaps = collections.defaultdict(set)
772 parents_snaps = collections.defaultdict(set)
773 candidate_chains = [deltachain(p) for p in parents]
773 candidate_chains = [deltachain(p) for p in parents]
774 for chain in candidate_chains:
774 for chain in candidate_chains:
775 for idx, s in enumerate(chain):
775 for idx, s in enumerate(chain):
776 if not revlog.issnapshot(s):
776 if not revlog.issnapshot(s):
777 break
777 break
778 parents_snaps[idx].add(s)
778 parents_snaps[idx].add(s)
779 snapfloor = min(parents_snaps[0]) + 1
779 snapfloor = min(parents_snaps[0]) + 1
780 _findsnapshots(revlog, snapshots, snapfloor)
780 _findsnapshots(revlog, snapshots, snapfloor)
781 # search for the highest "unrelated" revision
781 # search for the highest "unrelated" revision
782 #
782 #
783 # Adding snapshots used by "unrelated" revision increase the odd we
783 # Adding snapshots used by "unrelated" revision increase the odd we
784 # reuse an independant, yet better snapshot chain.
784 # reuse an independant, yet better snapshot chain.
785 #
785 #
786 # XXX instead of building a set of revisions, we could lazily enumerate
786 # XXX instead of building a set of revisions, we could lazily enumerate
787 # over the chains. That would be more efficient, however we stick to
787 # over the chains. That would be more efficient, however we stick to
788 # simple code for now.
788 # simple code for now.
789 all_revs = set()
789 all_revs = set()
790 for chain in candidate_chains:
790 for chain in candidate_chains:
791 all_revs.update(chain)
791 all_revs.update(chain)
792 other = None
792 other = None
793 for r in revlog.revs(prev, snapfloor):
793 for r in revlog.revs(prev, snapfloor):
794 if r not in all_revs:
794 if r not in all_revs:
795 other = r
795 other = r
796 break
796 break
797 if other is not None:
797 if other is not None:
798 # To avoid unfair competition, we won't use unrelated intermediate
798 # To avoid unfair competition, we won't use unrelated intermediate
799 # snapshot that are deeper than the ones from the parent delta
799 # snapshot that are deeper than the ones from the parent delta
800 # chain.
800 # chain.
801 max_depth = max(parents_snaps.keys())
801 max_depth = max(parents_snaps.keys())
802 chain = deltachain(other)
802 chain = deltachain(other)
803 for idx, s in enumerate(chain):
803 for idx, s in enumerate(chain):
804 if s < snapfloor:
804 if s < snapfloor:
805 continue
805 continue
806 if max_depth < idx:
806 if max_depth < idx:
807 break
807 break
808 if not revlog.issnapshot(s):
808 if not revlog.issnapshot(s):
809 break
809 break
810 parents_snaps[idx].add(s)
810 parents_snaps[idx].add(s)
811 # Test them as possible intermediate snapshot base
811 # Test them as possible intermediate snapshot base
812 # We test them from highest to lowest level. High level one are more
812 # We test them from highest to lowest level. High level one are more
813 # likely to result in small delta
813 # likely to result in small delta
814 floor = None
814 floor = None
815 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
815 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
816 siblings = set()
816 siblings = set()
817 for s in snaps:
817 for s in snaps:
818 siblings.update(snapshots[s])
818 siblings.update(snapshots[s])
819 # Before considering making a new intermediate snapshot, we check
819 # Before considering making a new intermediate snapshot, we check
820 # if an existing snapshot, children of base we consider, would be
820 # if an existing snapshot, children of base we consider, would be
821 # suitable.
821 # suitable.
822 #
822 #
823 # It give a change to reuse a delta chain "unrelated" to the
823 # It give a change to reuse a delta chain "unrelated" to the
824 # current revision instead of starting our own. Without such
824 # current revision instead of starting our own. Without such
825 # re-use, topological branches would keep reopening new chains.
825 # re-use, topological branches would keep reopening new chains.
826 # Creating more and more snapshot as the repository grow.
826 # Creating more and more snapshot as the repository grow.
827
827
828 if floor is not None:
828 if floor is not None:
829 # We only do this for siblings created after the one in our
829 # We only do this for siblings created after the one in our
830 # parent's delta chain. Those created before has less chances
830 # parent's delta chain. Those created before has less chances
831 # to be valid base since our ancestors had to create a new
831 # to be valid base since our ancestors had to create a new
832 # snapshot.
832 # snapshot.
833 siblings = [r for r in siblings if floor < r]
833 siblings = [r for r in siblings if floor < r]
834 yield tuple(sorted(siblings))
834 yield tuple(sorted(siblings))
835 # then test the base from our parent's delta chain.
835 # then test the base from our parent's delta chain.
836 yield tuple(sorted(snaps))
836 yield tuple(sorted(snaps))
837 floor = min(snaps)
837 floor = min(snaps)
838 # No suitable base found in the parent chain, search if any full
838 # No suitable base found in the parent chain, search if any full
839 # snapshots emitted since parent's base would be a suitable base for an
839 # snapshots emitted since parent's base would be a suitable base for an
840 # intermediate snapshot.
840 # intermediate snapshot.
841 #
841 #
842 # It give a chance to reuse a delta chain unrelated to the current
842 # It give a chance to reuse a delta chain unrelated to the current
843 # revisions instead of starting our own. Without such re-use,
843 # revisions instead of starting our own. Without such re-use,
844 # topological branches would keep reopening new full chains. Creating
844 # topological branches would keep reopening new full chains. Creating
845 # more and more snapshot as the repository grow.
845 # more and more snapshot as the repository grow.
846 yield tuple(snapshots[nullrev])
846 yield tuple(snapshots[nullrev])
847
847
848 if not sparse:
848 if not sparse:
849 # other approach failed try against prev to hopefully save us a
849 # other approach failed try against prev to hopefully save us a
850 # fulltext.
850 # fulltext.
851 yield (prev,)
851 yield (prev,)
852
852
853 class deltacomputer(object):
853 class deltacomputer(object):
854 def __init__(self, revlog):
854 def __init__(self, revlog):
855 self.revlog = revlog
855 self.revlog = revlog
856
856
857 def buildtext(self, revinfo, fh):
857 def buildtext(self, revinfo, fh):
858 """Builds a fulltext version of a revision
858 """Builds a fulltext version of a revision
859
859
860 revinfo: _revisioninfo instance that contains all needed info
860 revinfo: _revisioninfo instance that contains all needed info
861 fh: file handle to either the .i or the .d revlog file,
861 fh: file handle to either the .i or the .d revlog file,
862 depending on whether it is inlined or not
862 depending on whether it is inlined or not
863 """
863 """
864 btext = revinfo.btext
864 btext = revinfo.btext
865 if btext[0] is not None:
865 if btext[0] is not None:
866 return btext[0]
866 return btext[0]
867
867
868 revlog = self.revlog
868 revlog = self.revlog
869 cachedelta = revinfo.cachedelta
869 cachedelta = revinfo.cachedelta
870 baserev = cachedelta[0]
870 baserev = cachedelta[0]
871 delta = cachedelta[1]
871 delta = cachedelta[1]
872
872
873 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
873 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
874 revinfo.p1, revinfo.p2,
874 revinfo.p1, revinfo.p2,
875 revinfo.flags, revinfo.node)
875 revinfo.flags, revinfo.node)
876 return fulltext
876 return fulltext
877
877
878 def _builddeltadiff(self, base, revinfo, fh):
878 def _builddeltadiff(self, base, revinfo, fh):
879 revlog = self.revlog
879 revlog = self.revlog
880 t = self.buildtext(revinfo, fh)
880 t = self.buildtext(revinfo, fh)
881 if revlog.iscensored(base):
881 if revlog.iscensored(base):
882 # deltas based on a censored revision must replace the
882 # deltas based on a censored revision must replace the
883 # full content in one patch, so delta works everywhere
883 # full content in one patch, so delta works everywhere
884 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
884 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
885 delta = header + t
885 delta = header + t
886 else:
886 else:
887 ptext = revlog.revision(base, _df=fh, raw=True)
887 ptext = revlog.revision(base, _df=fh, raw=True)
888 delta = mdiff.textdiff(ptext, t)
888 delta = mdiff.textdiff(ptext, t)
889
889
890 return delta
890 return delta
891
891
892 def _builddeltainfo(self, revinfo, base, fh):
892 def _builddeltainfo(self, revinfo, base, fh):
893 # can we use the cached delta?
893 # can we use the cached delta?
894 delta = None
894 delta = None
895 if revinfo.cachedelta:
895 if revinfo.cachedelta:
896 cachebase, cachediff = revinfo.cachedelta
896 cachebase, cachediff = revinfo.cachedelta
897 #check if the diff still apply
897 #check if the diff still apply
898 currentbase = cachebase
898 currentbase = cachebase
899 while (currentbase != nullrev
899 while (currentbase != nullrev
900 and currentbase != base
900 and currentbase != base
901 and self.revlog.length(currentbase) == 0):
901 and self.revlog.length(currentbase) == 0):
902 currentbase = self.revlog.deltaparent(currentbase)
902 currentbase = self.revlog.deltaparent(currentbase)
903 if currentbase == base:
903 if currentbase == base:
904 delta = revinfo.cachedelta[1]
904 delta = revinfo.cachedelta[1]
905 if delta is None:
905 if delta is None:
906 delta = self._builddeltadiff(base, revinfo, fh)
906 delta = self._builddeltadiff(base, revinfo, fh)
907 revlog = self.revlog
907 revlog = self.revlog
908 header, data = revlog.compress(delta)
908 header, data = revlog.compress(delta)
909 deltalen = len(header) + len(data)
909 deltalen = len(header) + len(data)
910 chainbase = revlog.chainbase(base)
910 chainbase = revlog.chainbase(base)
911 offset = revlog.end(len(revlog) - 1)
911 offset = revlog.end(len(revlog) - 1)
912 dist = deltalen + offset - revlog.start(chainbase)
912 dist = deltalen + offset - revlog.start(chainbase)
913 if revlog._generaldelta:
913 if revlog._generaldelta:
914 deltabase = base
914 deltabase = base
915 else:
915 else:
916 deltabase = chainbase
916 deltabase = chainbase
917 chainlen, compresseddeltalen = revlog._chaininfo(base)
917 chainlen, compresseddeltalen = revlog._chaininfo(base)
918 chainlen += 1
918 chainlen += 1
919 compresseddeltalen += deltalen
919 compresseddeltalen += deltalen
920
920
921 revlog = self.revlog
921 revlog = self.revlog
922 snapshotdepth = None
922 snapshotdepth = None
923 if deltabase == nullrev:
923 if deltabase == nullrev:
924 snapshotdepth = 0
924 snapshotdepth = 0
925 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
925 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
926 # A delta chain should always be one full snapshot,
926 # A delta chain should always be one full snapshot,
927 # zero or more semi-snapshots, and zero or more deltas
927 # zero or more semi-snapshots, and zero or more deltas
928 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
928 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
929 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
929 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
930 snapshotdepth = len(revlog._deltachain(deltabase)[0])
930 snapshotdepth = len(revlog._deltachain(deltabase)[0])
931
931
932 return _deltainfo(dist, deltalen, (header, data), deltabase,
932 return _deltainfo(dist, deltalen, (header, data), deltabase,
933 chainbase, chainlen, compresseddeltalen,
933 chainbase, chainlen, compresseddeltalen,
934 snapshotdepth)
934 snapshotdepth)
935
935
936 def _fullsnapshotinfo(self, fh, revinfo):
936 def _fullsnapshotinfo(self, fh, revinfo):
937 curr = len(self.revlog)
937 curr = len(self.revlog)
938 rawtext = self.buildtext(revinfo, fh)
938 rawtext = self.buildtext(revinfo, fh)
939 data = self.revlog.compress(rawtext)
939 data = self.revlog.compress(rawtext)
940 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
940 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
941 deltabase = chainbase = curr
941 deltabase = chainbase = curr
942 snapshotdepth = 0
942 snapshotdepth = 0
943 chainlen = 1
943 chainlen = 1
944
944
945 return _deltainfo(dist, deltalen, data, deltabase,
945 return _deltainfo(dist, deltalen, data, deltabase,
946 chainbase, chainlen, compresseddeltalen,
946 chainbase, chainlen, compresseddeltalen,
947 snapshotdepth)
947 snapshotdepth)
948
948
949 def finddeltainfo(self, revinfo, fh):
949 def finddeltainfo(self, revinfo, fh):
950 """Find an acceptable delta against a candidate revision
950 """Find an acceptable delta against a candidate revision
951
951
952 revinfo: information about the revision (instance of _revisioninfo)
952 revinfo: information about the revision (instance of _revisioninfo)
953 fh: file handle to either the .i or the .d revlog file,
953 fh: file handle to either the .i or the .d revlog file,
954 depending on whether it is inlined or not
954 depending on whether it is inlined or not
955
955
956 Returns the first acceptable candidate revision, as ordered by
956 Returns the first acceptable candidate revision, as ordered by
957 _candidategroups
957 _candidategroups
958
958
959 If no suitable deltabase is found, we return delta info for a full
959 If no suitable deltabase is found, we return delta info for a full
960 snapshot.
960 snapshot.
961 """
961 """
962 if not revinfo.textlen:
962 if not revinfo.textlen:
963 return self._fullsnapshotinfo(fh, revinfo)
963 return self._fullsnapshotinfo(fh, revinfo)
964
964
965 # no delta for flag processor revision (see "candelta" for why)
965 # no delta for flag processor revision (see "candelta" for why)
966 # not calling candelta since only one revision needs test, also to
966 # not calling candelta since only one revision needs test, also to
967 # avoid overhead fetching flags again.
967 # avoid overhead fetching flags again.
968 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
968 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
969 return self._fullsnapshotinfo(fh, revinfo)
969 return self._fullsnapshotinfo(fh, revinfo)
970
970
971 cachedelta = revinfo.cachedelta
971 cachedelta = revinfo.cachedelta
972 p1 = revinfo.p1
972 p1 = revinfo.p1
973 p2 = revinfo.p2
973 p2 = revinfo.p2
974 revlog = self.revlog
974 revlog = self.revlog
975
975
976 deltainfo = None
976 deltainfo = None
977 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
977 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
978 groups = _candidategroups(self.revlog, revinfo.textlen,
978 groups = _candidategroups(self.revlog, revinfo.textlen,
979 p1r, p2r, cachedelta)
979 p1r, p2r, cachedelta)
980 candidaterevs = next(groups)
980 candidaterevs = next(groups)
981 while candidaterevs is not None:
981 while candidaterevs is not None:
982 nominateddeltas = []
982 nominateddeltas = []
983 if deltainfo is not None:
983 if deltainfo is not None:
984 # if we already found a good delta,
984 # if we already found a good delta,
985 # challenge it against refined candidates
985 # challenge it against refined candidates
986 nominateddeltas.append(deltainfo)
986 nominateddeltas.append(deltainfo)
987 for candidaterev in candidaterevs:
987 for candidaterev in candidaterevs:
988 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
988 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
989 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
989 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
990 nominateddeltas.append(candidatedelta)
990 nominateddeltas.append(candidatedelta)
991 if nominateddeltas:
991 if nominateddeltas:
992 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
992 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
993 if deltainfo is not None:
993 if deltainfo is not None:
994 candidaterevs = groups.send(deltainfo.base)
994 candidaterevs = groups.send(deltainfo.base)
995 else:
995 else:
996 candidaterevs = next(groups)
996 candidaterevs = next(groups)
997
997
998 if deltainfo is None:
998 if deltainfo is None:
999 deltainfo = self._fullsnapshotinfo(fh, revinfo)
999 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1000 return deltainfo
1000 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now