##// END OF EJS Templates
delta: ignore base whose chains already don't match expectations...
Boris Feld -
r41015:ba09db26 default
parent child Browse files
Show More
@@ -1,989 +1,1000 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 )
33 )
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38 class _testrevlog(object):
38 class _testrevlog(object):
39 """minimalist fake revlog to use in doctests"""
39 """minimalist fake revlog to use in doctests"""
40
40
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 """data is an list of revision payload boundaries"""
42 """data is an list of revision payload boundaries"""
43 self._data = data
43 self._data = data
44 self._srdensitythreshold = density
44 self._srdensitythreshold = density
45 self._srmingapsize = mingap
45 self._srmingapsize = mingap
46 self._snapshot = set(snapshot)
46 self._snapshot = set(snapshot)
47 self.index = None
47 self.index = None
48
48
49 def start(self, rev):
49 def start(self, rev):
50 if rev == 0:
50 if rev == 0:
51 return 0
51 return 0
52 return self._data[rev - 1]
52 return self._data[rev - 1]
53
53
54 def end(self, rev):
54 def end(self, rev):
55 return self._data[rev]
55 return self._data[rev]
56
56
57 def length(self, rev):
57 def length(self, rev):
58 return self.end(rev) - self.start(rev)
58 return self.end(rev) - self.start(rev)
59
59
60 def __len__(self):
60 def __len__(self):
61 return len(self._data)
61 return len(self._data)
62
62
63 def issnapshot(self, rev):
63 def issnapshot(self, rev):
64 return rev in self._snapshot
64 return rev in self._snapshot
65
65
66 def slicechunk(revlog, revs, targetsize=None):
66 def slicechunk(revlog, revs, targetsize=None):
67 """slice revs to reduce the amount of unrelated data to be read from disk.
67 """slice revs to reduce the amount of unrelated data to be read from disk.
68
68
69 ``revs`` is sliced into groups that should be read in one time.
69 ``revs`` is sliced into groups that should be read in one time.
70 Assume that revs are sorted.
70 Assume that revs are sorted.
71
71
72 The initial chunk is sliced until the overall density (payload/chunks-span
72 The initial chunk is sliced until the overall density (payload/chunks-span
73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
74 `revlog._srmingapsize` is skipped.
74 `revlog._srmingapsize` is skipped.
75
75
76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
77 For consistency with other slicing choice, this limit won't go lower than
77 For consistency with other slicing choice, this limit won't go lower than
78 `revlog._srmingapsize`.
78 `revlog._srmingapsize`.
79
79
80 If individual revisions chunk are larger than this limit, they will still
80 If individual revisions chunk are larger than this limit, they will still
81 be raised individually.
81 be raised individually.
82
82
83 >>> data = [
83 >>> data = [
84 ... 5, #00 (5)
84 ... 5, #00 (5)
85 ... 10, #01 (5)
85 ... 10, #01 (5)
86 ... 12, #02 (2)
86 ... 12, #02 (2)
87 ... 12, #03 (empty)
87 ... 12, #03 (empty)
88 ... 27, #04 (15)
88 ... 27, #04 (15)
89 ... 31, #05 (4)
89 ... 31, #05 (4)
90 ... 31, #06 (empty)
90 ... 31, #06 (empty)
91 ... 42, #07 (11)
91 ... 42, #07 (11)
92 ... 47, #08 (5)
92 ... 47, #08 (5)
93 ... 47, #09 (empty)
93 ... 47, #09 (empty)
94 ... 48, #10 (1)
94 ... 48, #10 (1)
95 ... 51, #11 (3)
95 ... 51, #11 (3)
96 ... 74, #12 (23)
96 ... 74, #12 (23)
97 ... 85, #13 (11)
97 ... 85, #13 (11)
98 ... 86, #14 (1)
98 ... 86, #14 (1)
99 ... 91, #15 (5)
99 ... 91, #15 (5)
100 ... ]
100 ... ]
101 >>> revlog = _testrevlog(data, snapshot=range(16))
101 >>> revlog = _testrevlog(data, snapshot=range(16))
102
102
103 >>> list(slicechunk(revlog, list(range(16))))
103 >>> list(slicechunk(revlog, list(range(16))))
104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
105 >>> list(slicechunk(revlog, [0, 15]))
105 >>> list(slicechunk(revlog, [0, 15]))
106 [[0], [15]]
106 [[0], [15]]
107 >>> list(slicechunk(revlog, [0, 11, 15]))
107 >>> list(slicechunk(revlog, [0, 11, 15]))
108 [[0], [11], [15]]
108 [[0], [11], [15]]
109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
110 [[0], [11, 13, 15]]
110 [[0], [11, 13, 15]]
111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
112 [[1, 2], [5, 8, 10, 11], [14]]
112 [[1, 2], [5, 8, 10, 11], [14]]
113
113
114 Slicing with a maximum chunk size
114 Slicing with a maximum chunk size
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
116 [[0], [11], [13], [15]]
116 [[0], [11], [13], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
118 [[0], [11], [13, 15]]
118 [[0], [11], [13, 15]]
119 """
119 """
120 if targetsize is not None:
120 if targetsize is not None:
121 targetsize = max(targetsize, revlog._srmingapsize)
121 targetsize = max(targetsize, revlog._srmingapsize)
122 # targetsize should not be specified when evaluating delta candidates:
122 # targetsize should not be specified when evaluating delta candidates:
123 # * targetsize is used to ensure we stay within specification when reading,
123 # * targetsize is used to ensure we stay within specification when reading,
124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
125 if densityslicing is None:
125 if densityslicing is None:
126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
127 for chunk in densityslicing(revs,
127 for chunk in densityslicing(revs,
128 revlog._srdensitythreshold,
128 revlog._srdensitythreshold,
129 revlog._srmingapsize):
129 revlog._srmingapsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
131 yield subchunk
131 yield subchunk
132
132
133 def _slicechunktosize(revlog, revs, targetsize=None):
133 def _slicechunktosize(revlog, revs, targetsize=None):
134 """slice revs to match the target size
134 """slice revs to match the target size
135
135
136 This is intended to be used on chunk that density slicing selected by that
136 This is intended to be used on chunk that density slicing selected by that
137 are still too large compared to the read garantee of revlog. This might
137 are still too large compared to the read garantee of revlog. This might
138 happens when "minimal gap size" interrupted the slicing or when chain are
138 happens when "minimal gap size" interrupted the slicing or when chain are
139 built in a way that create large blocks next to each other.
139 built in a way that create large blocks next to each other.
140
140
141 >>> data = [
141 >>> data = [
142 ... 3, #0 (3)
142 ... 3, #0 (3)
143 ... 5, #1 (2)
143 ... 5, #1 (2)
144 ... 6, #2 (1)
144 ... 6, #2 (1)
145 ... 8, #3 (2)
145 ... 8, #3 (2)
146 ... 8, #4 (empty)
146 ... 8, #4 (empty)
147 ... 11, #5 (3)
147 ... 11, #5 (3)
148 ... 12, #6 (1)
148 ... 12, #6 (1)
149 ... 13, #7 (1)
149 ... 13, #7 (1)
150 ... 14, #8 (1)
150 ... 14, #8 (1)
151 ... ]
151 ... ]
152
152
153 == All snapshots cases ==
153 == All snapshots cases ==
154 >>> revlog = _testrevlog(data, snapshot=range(9))
154 >>> revlog = _testrevlog(data, snapshot=range(9))
155
155
156 Cases where chunk is already small enough
156 Cases where chunk is already small enough
157 >>> list(_slicechunktosize(revlog, [0], 3))
157 >>> list(_slicechunktosize(revlog, [0], 3))
158 [[0]]
158 [[0]]
159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
160 [[6, 7]]
160 [[6, 7]]
161 >>> list(_slicechunktosize(revlog, [0], None))
161 >>> list(_slicechunktosize(revlog, [0], None))
162 [[0]]
162 [[0]]
163 >>> list(_slicechunktosize(revlog, [6, 7], None))
163 >>> list(_slicechunktosize(revlog, [6, 7], None))
164 [[6, 7]]
164 [[6, 7]]
165
165
166 cases where we need actual slicing
166 cases where we need actual slicing
167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
168 [[0], [1]]
168 [[0], [1]]
169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
170 [[1], [3]]
170 [[1], [3]]
171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
172 [[1, 2], [3]]
172 [[1, 2], [3]]
173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
174 [[3], [5]]
174 [[3], [5]]
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
176 [[3], [5]]
176 [[3], [5]]
177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
178 [[5], [6, 7, 8]]
178 [[5], [6, 7, 8]]
179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
180 [[0], [1, 2], [3], [5], [6, 7, 8]]
180 [[0], [1, 2], [3], [5], [6, 7, 8]]
181
181
182 Case with too large individual chunk (must return valid chunk)
182 Case with too large individual chunk (must return valid chunk)
183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
184 [[0], [1]]
184 [[0], [1]]
185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
186 [[1], [3]]
186 [[1], [3]]
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
188 [[3], [5]]
188 [[3], [5]]
189
189
190 == No Snapshot cases ==
190 == No Snapshot cases ==
191 >>> revlog = _testrevlog(data)
191 >>> revlog = _testrevlog(data)
192
192
193 Cases where chunk is already small enough
193 Cases where chunk is already small enough
194 >>> list(_slicechunktosize(revlog, [0], 3))
194 >>> list(_slicechunktosize(revlog, [0], 3))
195 [[0]]
195 [[0]]
196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
197 [[6, 7]]
197 [[6, 7]]
198 >>> list(_slicechunktosize(revlog, [0], None))
198 >>> list(_slicechunktosize(revlog, [0], None))
199 [[0]]
199 [[0]]
200 >>> list(_slicechunktosize(revlog, [6, 7], None))
200 >>> list(_slicechunktosize(revlog, [6, 7], None))
201 [[6, 7]]
201 [[6, 7]]
202
202
203 cases where we need actual slicing
203 cases where we need actual slicing
204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
209 [[1], [2, 3]]
209 [[1], [2, 3]]
210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
211 [[3], [5]]
211 [[3], [5]]
212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
213 [[3], [4, 5]]
213 [[3], [4, 5]]
214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
215 [[5], [6, 7, 8]]
215 [[5], [6, 7, 8]]
216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
217 [[0], [1, 2], [3], [5], [6, 7, 8]]
217 [[0], [1, 2], [3], [5], [6, 7, 8]]
218
218
219 Case with too large individual chunk (must return valid chunk)
219 Case with too large individual chunk (must return valid chunk)
220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
221 [[0], [1]]
221 [[0], [1]]
222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
223 [[1], [3]]
223 [[1], [3]]
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
225 [[3], [5]]
225 [[3], [5]]
226
226
227 == mixed case ==
227 == mixed case ==
228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
231 """
231 """
232 assert targetsize is None or 0 <= targetsize
232 assert targetsize is None or 0 <= targetsize
233 startdata = revlog.start(revs[0])
233 startdata = revlog.start(revs[0])
234 enddata = revlog.end(revs[-1])
234 enddata = revlog.end(revs[-1])
235 fullspan = enddata - startdata
235 fullspan = enddata - startdata
236 if targetsize is None or fullspan <= targetsize:
236 if targetsize is None or fullspan <= targetsize:
237 yield revs
237 yield revs
238 return
238 return
239
239
240 startrevidx = 0
240 startrevidx = 0
241 endrevidx = 1
241 endrevidx = 1
242 iterrevs = enumerate(revs)
242 iterrevs = enumerate(revs)
243 next(iterrevs) # skip first rev.
243 next(iterrevs) # skip first rev.
244 # first step: get snapshots out of the way
244 # first step: get snapshots out of the way
245 for idx, r in iterrevs:
245 for idx, r in iterrevs:
246 span = revlog.end(r) - startdata
246 span = revlog.end(r) - startdata
247 snapshot = revlog.issnapshot(r)
247 snapshot = revlog.issnapshot(r)
248 if span <= targetsize and snapshot:
248 if span <= targetsize and snapshot:
249 endrevidx = idx + 1
249 endrevidx = idx + 1
250 else:
250 else:
251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
252 if chunk:
252 if chunk:
253 yield chunk
253 yield chunk
254 startrevidx = idx
254 startrevidx = idx
255 startdata = revlog.start(r)
255 startdata = revlog.start(r)
256 endrevidx = idx + 1
256 endrevidx = idx + 1
257 if not snapshot:
257 if not snapshot:
258 break
258 break
259
259
260 # for the others, we use binary slicing to quickly converge toward valid
260 # for the others, we use binary slicing to quickly converge toward valid
261 # chunks (otherwise, we might end up looking for start/end of many
261 # chunks (otherwise, we might end up looking for start/end of many
262 # revisions). This logic is not looking for the perfect slicing point, it
262 # revisions). This logic is not looking for the perfect slicing point, it
263 # focuses on quickly converging toward valid chunks.
263 # focuses on quickly converging toward valid chunks.
264 nbitem = len(revs)
264 nbitem = len(revs)
265 while (enddata - startdata) > targetsize:
265 while (enddata - startdata) > targetsize:
266 endrevidx = nbitem
266 endrevidx = nbitem
267 if nbitem - startrevidx <= 1:
267 if nbitem - startrevidx <= 1:
268 break # protect against individual chunk larger than limit
268 break # protect against individual chunk larger than limit
269 localenddata = revlog.end(revs[endrevidx - 1])
269 localenddata = revlog.end(revs[endrevidx - 1])
270 span = localenddata - startdata
270 span = localenddata - startdata
271 while span > targetsize:
271 while span > targetsize:
272 if endrevidx - startrevidx <= 1:
272 if endrevidx - startrevidx <= 1:
273 break # protect against individual chunk larger than limit
273 break # protect against individual chunk larger than limit
274 endrevidx -= (endrevidx - startrevidx) // 2
274 endrevidx -= (endrevidx - startrevidx) // 2
275 localenddata = revlog.end(revs[endrevidx - 1])
275 localenddata = revlog.end(revs[endrevidx - 1])
276 span = localenddata - startdata
276 span = localenddata - startdata
277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
278 if chunk:
278 if chunk:
279 yield chunk
279 yield chunk
280 startrevidx = endrevidx
280 startrevidx = endrevidx
281 startdata = revlog.start(revs[startrevidx])
281 startdata = revlog.start(revs[startrevidx])
282
282
283 chunk = _trimchunk(revlog, revs, startrevidx)
283 chunk = _trimchunk(revlog, revs, startrevidx)
284 if chunk:
284 if chunk:
285 yield chunk
285 yield chunk
286
286
287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
288 mingapsize=0):
288 mingapsize=0):
289 """slice revs to reduce the amount of unrelated data to be read from disk.
289 """slice revs to reduce the amount of unrelated data to be read from disk.
290
290
291 ``revs`` is sliced into groups that should be read in one time.
291 ``revs`` is sliced into groups that should be read in one time.
292 Assume that revs are sorted.
292 Assume that revs are sorted.
293
293
294 The initial chunk is sliced until the overall density (payload/chunks-span
294 The initial chunk is sliced until the overall density (payload/chunks-span
295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
296 skipped.
296 skipped.
297
297
298 >>> revlog = _testrevlog([
298 >>> revlog = _testrevlog([
299 ... 5, #00 (5)
299 ... 5, #00 (5)
300 ... 10, #01 (5)
300 ... 10, #01 (5)
301 ... 12, #02 (2)
301 ... 12, #02 (2)
302 ... 12, #03 (empty)
302 ... 12, #03 (empty)
303 ... 27, #04 (15)
303 ... 27, #04 (15)
304 ... 31, #05 (4)
304 ... 31, #05 (4)
305 ... 31, #06 (empty)
305 ... 31, #06 (empty)
306 ... 42, #07 (11)
306 ... 42, #07 (11)
307 ... 47, #08 (5)
307 ... 47, #08 (5)
308 ... 47, #09 (empty)
308 ... 47, #09 (empty)
309 ... 48, #10 (1)
309 ... 48, #10 (1)
310 ... 51, #11 (3)
310 ... 51, #11 (3)
311 ... 74, #12 (23)
311 ... 74, #12 (23)
312 ... 85, #13 (11)
312 ... 85, #13 (11)
313 ... 86, #14 (1)
313 ... 86, #14 (1)
314 ... 91, #15 (5)
314 ... 91, #15 (5)
315 ... ])
315 ... ])
316
316
317 >>> list(_slicechunktodensity(revlog, list(range(16))))
317 >>> list(_slicechunktodensity(revlog, list(range(16))))
318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
319 >>> list(_slicechunktodensity(revlog, [0, 15]))
319 >>> list(_slicechunktodensity(revlog, [0, 15]))
320 [[0], [15]]
320 [[0], [15]]
321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
322 [[0], [11], [15]]
322 [[0], [11], [15]]
323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
324 [[0], [11, 13, 15]]
324 [[0], [11, 13, 15]]
325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
326 [[1, 2], [5, 8, 10, 11], [14]]
326 [[1, 2], [5, 8, 10, 11], [14]]
327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
328 ... mingapsize=20))
328 ... mingapsize=20))
329 [[1, 2, 3, 5, 8, 10, 11], [14]]
329 [[1, 2, 3, 5, 8, 10, 11], [14]]
330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
331 ... targetdensity=0.95))
331 ... targetdensity=0.95))
332 [[1, 2], [5], [8, 10, 11], [14]]
332 [[1, 2], [5], [8, 10, 11], [14]]
333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
334 ... targetdensity=0.95, mingapsize=12))
334 ... targetdensity=0.95, mingapsize=12))
335 [[1, 2], [5, 8, 10, 11], [14]]
335 [[1, 2], [5, 8, 10, 11], [14]]
336 """
336 """
337 start = revlog.start
337 start = revlog.start
338 length = revlog.length
338 length = revlog.length
339
339
340 if len(revs) <= 1:
340 if len(revs) <= 1:
341 yield revs
341 yield revs
342 return
342 return
343
343
344 deltachainspan = segmentspan(revlog, revs)
344 deltachainspan = segmentspan(revlog, revs)
345
345
346 if deltachainspan < mingapsize:
346 if deltachainspan < mingapsize:
347 yield revs
347 yield revs
348 return
348 return
349
349
350 readdata = deltachainspan
350 readdata = deltachainspan
351 chainpayload = sum(length(r) for r in revs)
351 chainpayload = sum(length(r) for r in revs)
352
352
353 if deltachainspan:
353 if deltachainspan:
354 density = chainpayload / float(deltachainspan)
354 density = chainpayload / float(deltachainspan)
355 else:
355 else:
356 density = 1.0
356 density = 1.0
357
357
358 if density >= targetdensity:
358 if density >= targetdensity:
359 yield revs
359 yield revs
360 return
360 return
361
361
362 # Store the gaps in a heap to have them sorted by decreasing size
362 # Store the gaps in a heap to have them sorted by decreasing size
363 gaps = []
363 gaps = []
364 prevend = None
364 prevend = None
365 for i, rev in enumerate(revs):
365 for i, rev in enumerate(revs):
366 revstart = start(rev)
366 revstart = start(rev)
367 revlen = length(rev)
367 revlen = length(rev)
368
368
369 # Skip empty revisions to form larger holes
369 # Skip empty revisions to form larger holes
370 if revlen == 0:
370 if revlen == 0:
371 continue
371 continue
372
372
373 if prevend is not None:
373 if prevend is not None:
374 gapsize = revstart - prevend
374 gapsize = revstart - prevend
375 # only consider holes that are large enough
375 # only consider holes that are large enough
376 if gapsize > mingapsize:
376 if gapsize > mingapsize:
377 gaps.append((gapsize, i))
377 gaps.append((gapsize, i))
378
378
379 prevend = revstart + revlen
379 prevend = revstart + revlen
380 # sort the gaps to pop them from largest to small
380 # sort the gaps to pop them from largest to small
381 gaps.sort()
381 gaps.sort()
382
382
383 # Collect the indices of the largest holes until the density is acceptable
383 # Collect the indices of the largest holes until the density is acceptable
384 selected = []
384 selected = []
385 while gaps and density < targetdensity:
385 while gaps and density < targetdensity:
386 gapsize, gapidx = gaps.pop()
386 gapsize, gapidx = gaps.pop()
387
387
388 selected.append(gapidx)
388 selected.append(gapidx)
389
389
390 # the gap sizes are stored as negatives to be sorted decreasingly
390 # the gap sizes are stored as negatives to be sorted decreasingly
391 # by the heap
391 # by the heap
392 readdata -= gapsize
392 readdata -= gapsize
393 if readdata > 0:
393 if readdata > 0:
394 density = chainpayload / float(readdata)
394 density = chainpayload / float(readdata)
395 else:
395 else:
396 density = 1.0
396 density = 1.0
397 selected.sort()
397 selected.sort()
398
398
399 # Cut the revs at collected indices
399 # Cut the revs at collected indices
400 previdx = 0
400 previdx = 0
401 for idx in selected:
401 for idx in selected:
402
402
403 chunk = _trimchunk(revlog, revs, previdx, idx)
403 chunk = _trimchunk(revlog, revs, previdx, idx)
404 if chunk:
404 if chunk:
405 yield chunk
405 yield chunk
406
406
407 previdx = idx
407 previdx = idx
408
408
409 chunk = _trimchunk(revlog, revs, previdx)
409 chunk = _trimchunk(revlog, revs, previdx)
410 if chunk:
410 if chunk:
411 yield chunk
411 yield chunk
412
412
413 def _trimchunk(revlog, revs, startidx, endidx=None):
413 def _trimchunk(revlog, revs, startidx, endidx=None):
414 """returns revs[startidx:endidx] without empty trailing revs
414 """returns revs[startidx:endidx] without empty trailing revs
415
415
416 Doctest Setup
416 Doctest Setup
417 >>> revlog = _testrevlog([
417 >>> revlog = _testrevlog([
418 ... 5, #0
418 ... 5, #0
419 ... 10, #1
419 ... 10, #1
420 ... 12, #2
420 ... 12, #2
421 ... 12, #3 (empty)
421 ... 12, #3 (empty)
422 ... 17, #4
422 ... 17, #4
423 ... 21, #5
423 ... 21, #5
424 ... 21, #6 (empty)
424 ... 21, #6 (empty)
425 ... ])
425 ... ])
426
426
427 Contiguous cases:
427 Contiguous cases:
428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
429 [0, 1, 2, 3, 4, 5]
429 [0, 1, 2, 3, 4, 5]
430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
431 [0, 1, 2, 3, 4]
431 [0, 1, 2, 3, 4]
432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
433 [0, 1, 2]
433 [0, 1, 2]
434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
435 [2]
435 [2]
436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
437 [3, 4, 5]
437 [3, 4, 5]
438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
439 [3, 4]
439 [3, 4]
440
440
441 Discontiguous cases:
441 Discontiguous cases:
442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
443 [1, 3, 5]
443 [1, 3, 5]
444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
445 [1]
445 [1]
446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
447 [3, 5]
447 [3, 5]
448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
449 [3, 5]
449 [3, 5]
450 """
450 """
451 length = revlog.length
451 length = revlog.length
452
452
453 if endidx is None:
453 if endidx is None:
454 endidx = len(revs)
454 endidx = len(revs)
455
455
456 # If we have a non-emtpy delta candidate, there are nothing to trim
456 # If we have a non-emtpy delta candidate, there are nothing to trim
457 if revs[endidx - 1] < len(revlog):
457 if revs[endidx - 1] < len(revlog):
458 # Trim empty revs at the end, except the very first revision of a chain
458 # Trim empty revs at the end, except the very first revision of a chain
459 while (endidx > 1
459 while (endidx > 1
460 and endidx > startidx
460 and endidx > startidx
461 and length(revs[endidx - 1]) == 0):
461 and length(revs[endidx - 1]) == 0):
462 endidx -= 1
462 endidx -= 1
463
463
464 return revs[startidx:endidx]
464 return revs[startidx:endidx]
465
465
466 def segmentspan(revlog, revs):
466 def segmentspan(revlog, revs):
467 """Get the byte span of a segment of revisions
467 """Get the byte span of a segment of revisions
468
468
469 revs is a sorted array of revision numbers
469 revs is a sorted array of revision numbers
470
470
471 >>> revlog = _testrevlog([
471 >>> revlog = _testrevlog([
472 ... 5, #0
472 ... 5, #0
473 ... 10, #1
473 ... 10, #1
474 ... 12, #2
474 ... 12, #2
475 ... 12, #3 (empty)
475 ... 12, #3 (empty)
476 ... 17, #4
476 ... 17, #4
477 ... ])
477 ... ])
478
478
479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
480 17
480 17
481 >>> segmentspan(revlog, [0, 4])
481 >>> segmentspan(revlog, [0, 4])
482 17
482 17
483 >>> segmentspan(revlog, [3, 4])
483 >>> segmentspan(revlog, [3, 4])
484 5
484 5
485 >>> segmentspan(revlog, [1, 2, 3,])
485 >>> segmentspan(revlog, [1, 2, 3,])
486 7
486 7
487 >>> segmentspan(revlog, [1, 3])
487 >>> segmentspan(revlog, [1, 3])
488 7
488 7
489 """
489 """
490 if not revs:
490 if not revs:
491 return 0
491 return 0
492 end = revlog.end(revs[-1])
492 end = revlog.end(revs[-1])
493 return end - revlog.start(revs[0])
493 return end - revlog.start(revs[0])
494
494
495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
496 """build full text from a (base, delta) pair and other metadata"""
496 """build full text from a (base, delta) pair and other metadata"""
497 # special case deltas which replace entire base; no need to decode
497 # special case deltas which replace entire base; no need to decode
498 # base revision. this neatly avoids censored bases, which throw when
498 # base revision. this neatly avoids censored bases, which throw when
499 # they're decoded.
499 # they're decoded.
500 hlen = struct.calcsize(">lll")
500 hlen = struct.calcsize(">lll")
501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
502 len(delta) - hlen):
502 len(delta) - hlen):
503 fulltext = delta[hlen:]
503 fulltext = delta[hlen:]
504 else:
504 else:
505 # deltabase is rawtext before changed by flag processors, which is
505 # deltabase is rawtext before changed by flag processors, which is
506 # equivalent to non-raw text
506 # equivalent to non-raw text
507 basetext = revlog.revision(baserev, _df=fh, raw=False)
507 basetext = revlog.revision(baserev, _df=fh, raw=False)
508 fulltext = mdiff.patch(basetext, delta)
508 fulltext = mdiff.patch(basetext, delta)
509
509
510 try:
510 try:
511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
512 fulltext, validatehash = res
512 fulltext, validatehash = res
513 if validatehash:
513 if validatehash:
514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
515 if flags & REVIDX_ISCENSORED:
515 if flags & REVIDX_ISCENSORED:
516 raise error.StorageError(_('node %s is not censored') %
516 raise error.StorageError(_('node %s is not censored') %
517 expectednode)
517 expectednode)
518 except error.CensoredNodeError:
518 except error.CensoredNodeError:
519 # must pass the censored index flag to add censored revisions
519 # must pass the censored index flag to add censored revisions
520 if not flags & REVIDX_ISCENSORED:
520 if not flags & REVIDX_ISCENSORED:
521 raise
521 raise
522 return fulltext
522 return fulltext
523
523
524 @attr.s(slots=True, frozen=True)
524 @attr.s(slots=True, frozen=True)
525 class _deltainfo(object):
525 class _deltainfo(object):
526 distance = attr.ib()
526 distance = attr.ib()
527 deltalen = attr.ib()
527 deltalen = attr.ib()
528 data = attr.ib()
528 data = attr.ib()
529 base = attr.ib()
529 base = attr.ib()
530 chainbase = attr.ib()
530 chainbase = attr.ib()
531 chainlen = attr.ib()
531 chainlen = attr.ib()
532 compresseddeltalen = attr.ib()
532 compresseddeltalen = attr.ib()
533 snapshotdepth = attr.ib()
533 snapshotdepth = attr.ib()
534
534
535 def isgooddeltainfo(revlog, deltainfo, revinfo):
535 def isgooddeltainfo(revlog, deltainfo, revinfo):
536 """Returns True if the given delta is good. Good means that it is within
536 """Returns True if the given delta is good. Good means that it is within
537 the disk span, disk size, and chain length bounds that we know to be
537 the disk span, disk size, and chain length bounds that we know to be
538 performant."""
538 performant."""
539 if deltainfo is None:
539 if deltainfo is None:
540 return False
540 return False
541
541
542 # - 'deltainfo.distance' is the distance from the base revision --
542 # - 'deltainfo.distance' is the distance from the base revision --
543 # bounding it limits the amount of I/O we need to do.
543 # bounding it limits the amount of I/O we need to do.
544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
545 # deltas we need to apply -- bounding it limits the amount of CPU
545 # deltas we need to apply -- bounding it limits the amount of CPU
546 # we consume.
546 # we consume.
547
547
548 textlen = revinfo.textlen
548 textlen = revinfo.textlen
549 defaultmax = textlen * 4
549 defaultmax = textlen * 4
550 maxdist = revlog._maxdeltachainspan
550 maxdist = revlog._maxdeltachainspan
551 if not maxdist:
551 if not maxdist:
552 maxdist = deltainfo.distance # ensure the conditional pass
552 maxdist = deltainfo.distance # ensure the conditional pass
553 maxdist = max(maxdist, defaultmax)
553 maxdist = max(maxdist, defaultmax)
554
554
555 # Bad delta from read span:
555 # Bad delta from read span:
556 #
556 #
557 # If the span of data read is larger than the maximum allowed.
557 # If the span of data read is larger than the maximum allowed.
558 #
558 #
559 # In the sparse-revlog case, we rely on the associated "sparse reading"
559 # In the sparse-revlog case, we rely on the associated "sparse reading"
560 # to avoid issue related to the span of data. In theory, it would be
560 # to avoid issue related to the span of data. In theory, it would be
561 # possible to build pathological revlog where delta pattern would lead
561 # possible to build pathological revlog where delta pattern would lead
562 # to too many reads. However, they do not happen in practice at all. So
562 # to too many reads. However, they do not happen in practice at all. So
563 # we skip the span check entirely.
563 # we skip the span check entirely.
564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
565 return False
565 return False
566
566
567 # Bad delta from new delta size:
567 # Bad delta from new delta size:
568 #
568 #
569 # If the delta size is larger than the target text, storing the
569 # If the delta size is larger than the target text, storing the
570 # delta will be inefficient.
570 # delta will be inefficient.
571 if textlen < deltainfo.deltalen:
571 if textlen < deltainfo.deltalen:
572 return False
572 return False
573
573
574 # Bad delta from cumulated payload size:
574 # Bad delta from cumulated payload size:
575 #
575 #
576 # If the sum of delta get larger than K * target text length.
576 # If the sum of delta get larger than K * target text length.
577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
578 return False
578 return False
579
579
580 # Bad delta from chain length:
580 # Bad delta from chain length:
581 #
581 #
582 # If the number of delta in the chain gets too high.
582 # If the number of delta in the chain gets too high.
583 if (revlog._maxchainlen
583 if (revlog._maxchainlen
584 and revlog._maxchainlen < deltainfo.chainlen):
584 and revlog._maxchainlen < deltainfo.chainlen):
585 return False
585 return False
586
586
587 # bad delta from intermediate snapshot size limit
587 # bad delta from intermediate snapshot size limit
588 #
588 #
589 # If an intermediate snapshot size is higher than the limit. The
589 # If an intermediate snapshot size is higher than the limit. The
590 # limit exist to prevent endless chain of intermediate delta to be
590 # limit exist to prevent endless chain of intermediate delta to be
591 # created.
591 # created.
592 if (deltainfo.snapshotdepth is not None and
592 if (deltainfo.snapshotdepth is not None and
593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
594 return False
594 return False
595
595
596 # bad delta if new intermediate snapshot is larger than the previous
596 # bad delta if new intermediate snapshot is larger than the previous
597 # snapshot
597 # snapshot
598 if (deltainfo.snapshotdepth
598 if (deltainfo.snapshotdepth
599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
600 return False
600 return False
601
601
602 return True
602 return True
603
603
604 # If a revision's full text is that much bigger than a base candidate full
604 # If a revision's full text is that much bigger than a base candidate full
605 # text's, it is very unlikely that it will produce a valid delta. We no longer
605 # text's, it is very unlikely that it will produce a valid delta. We no longer
606 # consider these candidates.
606 # consider these candidates.
607 LIMIT_BASE2TEXT = 50
607 LIMIT_BASE2TEXT = 50
608
608
609 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
609 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
610 """Provides group of revision to be tested as delta base
610 """Provides group of revision to be tested as delta base
611
611
612 This top level function focus on emitting groups with unique and worthwhile
612 This top level function focus on emitting groups with unique and worthwhile
613 content. See _raw_candidate_groups for details about the group order.
613 content. See _raw_candidate_groups for details about the group order.
614 """
614 """
615 # should we try to build a delta?
615 # should we try to build a delta?
616 if not (len(revlog) and revlog._storedeltachains):
616 if not (len(revlog) and revlog._storedeltachains):
617 yield None
617 yield None
618 return
618 return
619
619
620 deltalength = revlog.length
620 deltalength = revlog.length
621 deltaparent = revlog.deltaparent
621 deltaparent = revlog.deltaparent
622 sparse = revlog._sparserevlog
622 sparse = revlog._sparserevlog
623 good = None
623 good = None
624
624
625 deltas_limit = textlen * LIMIT_DELTA2TEXT
625 deltas_limit = textlen * LIMIT_DELTA2TEXT
626
626
627 tested = set([nullrev])
627 tested = set([nullrev])
628 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
628 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
629 while True:
629 while True:
630 temptative = candidates.send(good)
630 temptative = candidates.send(good)
631 if temptative is None:
631 if temptative is None:
632 break
632 break
633 group = []
633 group = []
634 for rev in temptative:
634 for rev in temptative:
635 # skip over empty delta (no need to include them in a chain)
635 # skip over empty delta (no need to include them in a chain)
636 while (revlog._generaldelta
636 while (revlog._generaldelta
637 and not (rev == nullrev
637 and not (rev == nullrev
638 or rev in tested
638 or rev in tested
639 or deltalength(rev))):
639 or deltalength(rev))):
640 tested.add(rev)
640 tested.add(rev)
641 rev = deltaparent(rev)
641 rev = deltaparent(rev)
642 # no need to try a delta against nullrev, this will be done as a
642 # no need to try a delta against nullrev, this will be done as a
643 # last resort.
643 # last resort.
644 if rev == nullrev:
644 if rev == nullrev:
645 continue
645 continue
646 # filter out revision we tested already
646 # filter out revision we tested already
647 if rev in tested:
647 if rev in tested:
648 continue
648 continue
649 tested.add(rev)
649 tested.add(rev)
650 # filter out delta base that will never produce good delta
650 # filter out delta base that will never produce good delta
651 if deltas_limit < revlog.length(rev):
651 if deltas_limit < revlog.length(rev):
652 continue
652 continue
653 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
653 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
654 continue
654 continue
655 # no delta for rawtext-changing revs (see "candelta" for why)
655 # no delta for rawtext-changing revs (see "candelta" for why)
656 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
656 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
657 continue
657 continue
658 # If we reach here, we are about to build and test a delta.
659 # The delta building process will compute the chaininfo in all
660 # case, since that computation is cached, it is fine to access it
661 # here too.
662 chainlen, chainsize = revlog._chaininfo(rev)
663 # if chain will be too long, skip base
664 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
665 continue
666 # if chain already have too much data, skip base
667 if deltas_limit < chainsize:
668 continue
658 group.append(rev)
669 group.append(rev)
659 if group:
670 if group:
660 # XXX: in the sparse revlog case, group can become large,
671 # XXX: in the sparse revlog case, group can become large,
661 # impacting performances. Some bounding or slicing mecanism
672 # impacting performances. Some bounding or slicing mecanism
662 # would help to reduce this impact.
673 # would help to reduce this impact.
663 good = yield tuple(group)
674 good = yield tuple(group)
664 yield None
675 yield None
665
676
666 def _findsnapshots(revlog, cache, start_rev):
677 def _findsnapshots(revlog, cache, start_rev):
667 """find snapshot from start_rev to tip"""
678 """find snapshot from start_rev to tip"""
668 deltaparent = revlog.deltaparent
679 deltaparent = revlog.deltaparent
669 issnapshot = revlog.issnapshot
680 issnapshot = revlog.issnapshot
670 for rev in revlog.revs(start_rev):
681 for rev in revlog.revs(start_rev):
671 if issnapshot(rev):
682 if issnapshot(rev):
672 cache[deltaparent(rev)].append(rev)
683 cache[deltaparent(rev)].append(rev)
673
684
674 def _refinedgroups(revlog, p1, p2, cachedelta):
685 def _refinedgroups(revlog, p1, p2, cachedelta):
675 good = None
686 good = None
676 # First we try to reuse a the delta contained in the bundle.
687 # First we try to reuse a the delta contained in the bundle.
677 # (or from the source revlog)
688 # (or from the source revlog)
678 #
689 #
679 # This logic only applies to general delta repositories and can be disabled
690 # This logic only applies to general delta repositories and can be disabled
680 # through configuration. Disabling reuse source delta is useful when
691 # through configuration. Disabling reuse source delta is useful when
681 # we want to make sure we recomputed "optimal" deltas.
692 # we want to make sure we recomputed "optimal" deltas.
682 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
693 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
683 # Assume what we received from the server is a good choice
694 # Assume what we received from the server is a good choice
684 # build delta will reuse the cache
695 # build delta will reuse the cache
685 good = yield (cachedelta[0],)
696 good = yield (cachedelta[0],)
686 if good is not None:
697 if good is not None:
687 yield None
698 yield None
688 return
699 return
689 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
700 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
690 good = yield candidates
701 good = yield candidates
691 if good is not None:
702 if good is not None:
692 break
703 break
693
704
694 # If sparse revlog is enabled, we can try to refine the available deltas
705 # If sparse revlog is enabled, we can try to refine the available deltas
695 if not revlog._sparserevlog:
706 if not revlog._sparserevlog:
696 yield None
707 yield None
697 return
708 return
698
709
699 # if we have a refinable value, try to refine it
710 # if we have a refinable value, try to refine it
700 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
711 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
701 # refine snapshot down
712 # refine snapshot down
702 previous = None
713 previous = None
703 while previous != good:
714 while previous != good:
704 previous = good
715 previous = good
705 base = revlog.deltaparent(good)
716 base = revlog.deltaparent(good)
706 if base == nullrev:
717 if base == nullrev:
707 break
718 break
708 good = yield (base,)
719 good = yield (base,)
709 # refine snapshot up
720 # refine snapshot up
710 #
721 #
711 # XXX the _findsnapshots call can be expensive and is "duplicated" with
722 # XXX the _findsnapshots call can be expensive and is "duplicated" with
712 # the one done in `_rawgroups`. Once we start working on performance,
723 # the one done in `_rawgroups`. Once we start working on performance,
713 # we should make the two logics share this computation.
724 # we should make the two logics share this computation.
714 snapshots = collections.defaultdict(list)
725 snapshots = collections.defaultdict(list)
715 _findsnapshots(revlog, snapshots, good + 1)
726 _findsnapshots(revlog, snapshots, good + 1)
716 previous = None
727 previous = None
717 while good != previous:
728 while good != previous:
718 previous = good
729 previous = good
719 children = tuple(sorted(c for c in snapshots[good]))
730 children = tuple(sorted(c for c in snapshots[good]))
720 good = yield children
731 good = yield children
721
732
722 # we have found nothing
733 # we have found nothing
723 yield None
734 yield None
724
735
725 def _rawgroups(revlog, p1, p2, cachedelta):
736 def _rawgroups(revlog, p1, p2, cachedelta):
726 """Provides group of revision to be tested as delta base
737 """Provides group of revision to be tested as delta base
727
738
728 This lower level function focus on emitting delta theorically interresting
739 This lower level function focus on emitting delta theorically interresting
729 without looking it any practical details.
740 without looking it any practical details.
730
741
731 The group order aims at providing fast or small candidates first.
742 The group order aims at providing fast or small candidates first.
732 """
743 """
733 gdelta = revlog._generaldelta
744 gdelta = revlog._generaldelta
734 sparse = revlog._sparserevlog
745 sparse = revlog._sparserevlog
735 curr = len(revlog)
746 curr = len(revlog)
736 prev = curr - 1
747 prev = curr - 1
737 deltachain = lambda rev: revlog._deltachain(rev)[0]
748 deltachain = lambda rev: revlog._deltachain(rev)[0]
738
749
739 if gdelta:
750 if gdelta:
740 # exclude already lazy tested base if any
751 # exclude already lazy tested base if any
741 parents = [p for p in (p1, p2) if p != nullrev]
752 parents = [p for p in (p1, p2) if p != nullrev]
742
753
743 if not revlog._deltabothparents and len(parents) == 2:
754 if not revlog._deltabothparents and len(parents) == 2:
744 parents.sort()
755 parents.sort()
745 # To minimize the chance of having to build a fulltext,
756 # To minimize the chance of having to build a fulltext,
746 # pick first whichever parent is closest to us (max rev)
757 # pick first whichever parent is closest to us (max rev)
747 yield (parents[1],)
758 yield (parents[1],)
748 # then the other one (min rev) if the first did not fit
759 # then the other one (min rev) if the first did not fit
749 yield (parents[0],)
760 yield (parents[0],)
750 elif len(parents) > 0:
761 elif len(parents) > 0:
751 # Test all parents (1 or 2), and keep the best candidate
762 # Test all parents (1 or 2), and keep the best candidate
752 yield parents
763 yield parents
753
764
754 if sparse and parents:
765 if sparse and parents:
755 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
766 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
756 # See if we can use an existing snapshot in the parent chains to use as
767 # See if we can use an existing snapshot in the parent chains to use as
757 # a base for a new intermediate-snapshot
768 # a base for a new intermediate-snapshot
758 #
769 #
759 # search for snapshot in parents delta chain
770 # search for snapshot in parents delta chain
760 # map: snapshot-level: snapshot-rev
771 # map: snapshot-level: snapshot-rev
761 parents_snaps = collections.defaultdict(set)
772 parents_snaps = collections.defaultdict(set)
762 candidate_chains = [deltachain(p) for p in parents]
773 candidate_chains = [deltachain(p) for p in parents]
763 for chain in candidate_chains:
774 for chain in candidate_chains:
764 for idx, s in enumerate(chain):
775 for idx, s in enumerate(chain):
765 if not revlog.issnapshot(s):
776 if not revlog.issnapshot(s):
766 break
777 break
767 parents_snaps[idx].add(s)
778 parents_snaps[idx].add(s)
768 snapfloor = min(parents_snaps[0]) + 1
779 snapfloor = min(parents_snaps[0]) + 1
769 _findsnapshots(revlog, snapshots, snapfloor)
780 _findsnapshots(revlog, snapshots, snapfloor)
770 # search for the highest "unrelated" revision
781 # search for the highest "unrelated" revision
771 #
782 #
772 # Adding snapshots used by "unrelated" revision increase the odd we
783 # Adding snapshots used by "unrelated" revision increase the odd we
773 # reuse an independant, yet better snapshot chain.
784 # reuse an independant, yet better snapshot chain.
774 #
785 #
775 # XXX instead of building a set of revisions, we could lazily enumerate
786 # XXX instead of building a set of revisions, we could lazily enumerate
776 # over the chains. That would be more efficient, however we stick to
787 # over the chains. That would be more efficient, however we stick to
777 # simple code for now.
788 # simple code for now.
778 all_revs = set()
789 all_revs = set()
779 for chain in candidate_chains:
790 for chain in candidate_chains:
780 all_revs.update(chain)
791 all_revs.update(chain)
781 other = None
792 other = None
782 for r in revlog.revs(prev, snapfloor):
793 for r in revlog.revs(prev, snapfloor):
783 if r not in all_revs:
794 if r not in all_revs:
784 other = r
795 other = r
785 break
796 break
786 if other is not None:
797 if other is not None:
787 # To avoid unfair competition, we won't use unrelated intermediate
798 # To avoid unfair competition, we won't use unrelated intermediate
788 # snapshot that are deeper than the ones from the parent delta
799 # snapshot that are deeper than the ones from the parent delta
789 # chain.
800 # chain.
790 max_depth = max(parents_snaps.keys())
801 max_depth = max(parents_snaps.keys())
791 chain = deltachain(other)
802 chain = deltachain(other)
792 for idx, s in enumerate(chain):
803 for idx, s in enumerate(chain):
793 if s < snapfloor:
804 if s < snapfloor:
794 continue
805 continue
795 if max_depth < idx:
806 if max_depth < idx:
796 break
807 break
797 if not revlog.issnapshot(s):
808 if not revlog.issnapshot(s):
798 break
809 break
799 parents_snaps[idx].add(s)
810 parents_snaps[idx].add(s)
800 # Test them as possible intermediate snapshot base
811 # Test them as possible intermediate snapshot base
801 # We test them from highest to lowest level. High level one are more
812 # We test them from highest to lowest level. High level one are more
802 # likely to result in small delta
813 # likely to result in small delta
803 floor = None
814 floor = None
804 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
815 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
805 siblings = set()
816 siblings = set()
806 for s in snaps:
817 for s in snaps:
807 siblings.update(snapshots[s])
818 siblings.update(snapshots[s])
808 # Before considering making a new intermediate snapshot, we check
819 # Before considering making a new intermediate snapshot, we check
809 # if an existing snapshot, children of base we consider, would be
820 # if an existing snapshot, children of base we consider, would be
810 # suitable.
821 # suitable.
811 #
822 #
812 # It give a change to reuse a delta chain "unrelated" to the
823 # It give a change to reuse a delta chain "unrelated" to the
813 # current revision instead of starting our own. Without such
824 # current revision instead of starting our own. Without such
814 # re-use, topological branches would keep reopening new chains.
825 # re-use, topological branches would keep reopening new chains.
815 # Creating more and more snapshot as the repository grow.
826 # Creating more and more snapshot as the repository grow.
816
827
817 if floor is not None:
828 if floor is not None:
818 # We only do this for siblings created after the one in our
829 # We only do this for siblings created after the one in our
819 # parent's delta chain. Those created before has less chances
830 # parent's delta chain. Those created before has less chances
820 # to be valid base since our ancestors had to create a new
831 # to be valid base since our ancestors had to create a new
821 # snapshot.
832 # snapshot.
822 siblings = [r for r in siblings if floor < r]
833 siblings = [r for r in siblings if floor < r]
823 yield tuple(sorted(siblings))
834 yield tuple(sorted(siblings))
824 # then test the base from our parent's delta chain.
835 # then test the base from our parent's delta chain.
825 yield tuple(sorted(snaps))
836 yield tuple(sorted(snaps))
826 floor = min(snaps)
837 floor = min(snaps)
827 # No suitable base found in the parent chain, search if any full
838 # No suitable base found in the parent chain, search if any full
828 # snapshots emitted since parent's base would be a suitable base for an
839 # snapshots emitted since parent's base would be a suitable base for an
829 # intermediate snapshot.
840 # intermediate snapshot.
830 #
841 #
831 # It give a chance to reuse a delta chain unrelated to the current
842 # It give a chance to reuse a delta chain unrelated to the current
832 # revisions instead of starting our own. Without such re-use,
843 # revisions instead of starting our own. Without such re-use,
833 # topological branches would keep reopening new full chains. Creating
844 # topological branches would keep reopening new full chains. Creating
834 # more and more snapshot as the repository grow.
845 # more and more snapshot as the repository grow.
835 yield tuple(snapshots[nullrev])
846 yield tuple(snapshots[nullrev])
836
847
837 if not sparse:
848 if not sparse:
838 # other approach failed try against prev to hopefully save us a
849 # other approach failed try against prev to hopefully save us a
839 # fulltext.
850 # fulltext.
840 yield (prev,)
851 yield (prev,)
841
852
842 class deltacomputer(object):
853 class deltacomputer(object):
843 def __init__(self, revlog):
854 def __init__(self, revlog):
844 self.revlog = revlog
855 self.revlog = revlog
845
856
846 def buildtext(self, revinfo, fh):
857 def buildtext(self, revinfo, fh):
847 """Builds a fulltext version of a revision
858 """Builds a fulltext version of a revision
848
859
849 revinfo: _revisioninfo instance that contains all needed info
860 revinfo: _revisioninfo instance that contains all needed info
850 fh: file handle to either the .i or the .d revlog file,
861 fh: file handle to either the .i or the .d revlog file,
851 depending on whether it is inlined or not
862 depending on whether it is inlined or not
852 """
863 """
853 btext = revinfo.btext
864 btext = revinfo.btext
854 if btext[0] is not None:
865 if btext[0] is not None:
855 return btext[0]
866 return btext[0]
856
867
857 revlog = self.revlog
868 revlog = self.revlog
858 cachedelta = revinfo.cachedelta
869 cachedelta = revinfo.cachedelta
859 baserev = cachedelta[0]
870 baserev = cachedelta[0]
860 delta = cachedelta[1]
871 delta = cachedelta[1]
861
872
862 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
873 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
863 revinfo.p1, revinfo.p2,
874 revinfo.p1, revinfo.p2,
864 revinfo.flags, revinfo.node)
875 revinfo.flags, revinfo.node)
865 return fulltext
876 return fulltext
866
877
867 def _builddeltadiff(self, base, revinfo, fh):
878 def _builddeltadiff(self, base, revinfo, fh):
868 revlog = self.revlog
879 revlog = self.revlog
869 t = self.buildtext(revinfo, fh)
880 t = self.buildtext(revinfo, fh)
870 if revlog.iscensored(base):
881 if revlog.iscensored(base):
871 # deltas based on a censored revision must replace the
882 # deltas based on a censored revision must replace the
872 # full content in one patch, so delta works everywhere
883 # full content in one patch, so delta works everywhere
873 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
884 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
874 delta = header + t
885 delta = header + t
875 else:
886 else:
876 ptext = revlog.revision(base, _df=fh, raw=True)
887 ptext = revlog.revision(base, _df=fh, raw=True)
877 delta = mdiff.textdiff(ptext, t)
888 delta = mdiff.textdiff(ptext, t)
878
889
879 return delta
890 return delta
880
891
881 def _builddeltainfo(self, revinfo, base, fh):
892 def _builddeltainfo(self, revinfo, base, fh):
882 # can we use the cached delta?
893 # can we use the cached delta?
883 delta = None
894 delta = None
884 if revinfo.cachedelta:
895 if revinfo.cachedelta:
885 cachebase, cachediff = revinfo.cachedelta
896 cachebase, cachediff = revinfo.cachedelta
886 #check if the diff still apply
897 #check if the diff still apply
887 currentbase = cachebase
898 currentbase = cachebase
888 while (currentbase != nullrev
899 while (currentbase != nullrev
889 and currentbase != base
900 and currentbase != base
890 and self.revlog.length(currentbase) == 0):
901 and self.revlog.length(currentbase) == 0):
891 currentbase = self.revlog.deltaparent(currentbase)
902 currentbase = self.revlog.deltaparent(currentbase)
892 if currentbase == base:
903 if currentbase == base:
893 delta = revinfo.cachedelta[1]
904 delta = revinfo.cachedelta[1]
894 if delta is None:
905 if delta is None:
895 delta = self._builddeltadiff(base, revinfo, fh)
906 delta = self._builddeltadiff(base, revinfo, fh)
896 revlog = self.revlog
907 revlog = self.revlog
897 header, data = revlog.compress(delta)
908 header, data = revlog.compress(delta)
898 deltalen = len(header) + len(data)
909 deltalen = len(header) + len(data)
899 chainbase = revlog.chainbase(base)
910 chainbase = revlog.chainbase(base)
900 offset = revlog.end(len(revlog) - 1)
911 offset = revlog.end(len(revlog) - 1)
901 dist = deltalen + offset - revlog.start(chainbase)
912 dist = deltalen + offset - revlog.start(chainbase)
902 if revlog._generaldelta:
913 if revlog._generaldelta:
903 deltabase = base
914 deltabase = base
904 else:
915 else:
905 deltabase = chainbase
916 deltabase = chainbase
906 chainlen, compresseddeltalen = revlog._chaininfo(base)
917 chainlen, compresseddeltalen = revlog._chaininfo(base)
907 chainlen += 1
918 chainlen += 1
908 compresseddeltalen += deltalen
919 compresseddeltalen += deltalen
909
920
910 revlog = self.revlog
921 revlog = self.revlog
911 snapshotdepth = None
922 snapshotdepth = None
912 if deltabase == nullrev:
923 if deltabase == nullrev:
913 snapshotdepth = 0
924 snapshotdepth = 0
914 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
925 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
915 # A delta chain should always be one full snapshot,
926 # A delta chain should always be one full snapshot,
916 # zero or more semi-snapshots, and zero or more deltas
927 # zero or more semi-snapshots, and zero or more deltas
917 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
928 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
918 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
929 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
919 snapshotdepth = len(revlog._deltachain(deltabase)[0])
930 snapshotdepth = len(revlog._deltachain(deltabase)[0])
920
931
921 return _deltainfo(dist, deltalen, (header, data), deltabase,
932 return _deltainfo(dist, deltalen, (header, data), deltabase,
922 chainbase, chainlen, compresseddeltalen,
933 chainbase, chainlen, compresseddeltalen,
923 snapshotdepth)
934 snapshotdepth)
924
935
925 def _fullsnapshotinfo(self, fh, revinfo):
936 def _fullsnapshotinfo(self, fh, revinfo):
926 curr = len(self.revlog)
937 curr = len(self.revlog)
927 rawtext = self.buildtext(revinfo, fh)
938 rawtext = self.buildtext(revinfo, fh)
928 data = self.revlog.compress(rawtext)
939 data = self.revlog.compress(rawtext)
929 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
940 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
930 deltabase = chainbase = curr
941 deltabase = chainbase = curr
931 snapshotdepth = 0
942 snapshotdepth = 0
932 chainlen = 1
943 chainlen = 1
933
944
934 return _deltainfo(dist, deltalen, data, deltabase,
945 return _deltainfo(dist, deltalen, data, deltabase,
935 chainbase, chainlen, compresseddeltalen,
946 chainbase, chainlen, compresseddeltalen,
936 snapshotdepth)
947 snapshotdepth)
937
948
938 def finddeltainfo(self, revinfo, fh):
949 def finddeltainfo(self, revinfo, fh):
939 """Find an acceptable delta against a candidate revision
950 """Find an acceptable delta against a candidate revision
940
951
941 revinfo: information about the revision (instance of _revisioninfo)
952 revinfo: information about the revision (instance of _revisioninfo)
942 fh: file handle to either the .i or the .d revlog file,
953 fh: file handle to either the .i or the .d revlog file,
943 depending on whether it is inlined or not
954 depending on whether it is inlined or not
944
955
945 Returns the first acceptable candidate revision, as ordered by
956 Returns the first acceptable candidate revision, as ordered by
946 _candidategroups
957 _candidategroups
947
958
948 If no suitable deltabase is found, we return delta info for a full
959 If no suitable deltabase is found, we return delta info for a full
949 snapshot.
960 snapshot.
950 """
961 """
951 if not revinfo.textlen:
962 if not revinfo.textlen:
952 return self._fullsnapshotinfo(fh, revinfo)
963 return self._fullsnapshotinfo(fh, revinfo)
953
964
954 # no delta for flag processor revision (see "candelta" for why)
965 # no delta for flag processor revision (see "candelta" for why)
955 # not calling candelta since only one revision needs test, also to
966 # not calling candelta since only one revision needs test, also to
956 # avoid overhead fetching flags again.
967 # avoid overhead fetching flags again.
957 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
968 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
958 return self._fullsnapshotinfo(fh, revinfo)
969 return self._fullsnapshotinfo(fh, revinfo)
959
970
960 cachedelta = revinfo.cachedelta
971 cachedelta = revinfo.cachedelta
961 p1 = revinfo.p1
972 p1 = revinfo.p1
962 p2 = revinfo.p2
973 p2 = revinfo.p2
963 revlog = self.revlog
974 revlog = self.revlog
964
975
965 deltainfo = None
976 deltainfo = None
966 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
977 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
967 groups = _candidategroups(self.revlog, revinfo.textlen,
978 groups = _candidategroups(self.revlog, revinfo.textlen,
968 p1r, p2r, cachedelta)
979 p1r, p2r, cachedelta)
969 candidaterevs = next(groups)
980 candidaterevs = next(groups)
970 while candidaterevs is not None:
981 while candidaterevs is not None:
971 nominateddeltas = []
982 nominateddeltas = []
972 if deltainfo is not None:
983 if deltainfo is not None:
973 # if we already found a good delta,
984 # if we already found a good delta,
974 # challenge it against refined candidates
985 # challenge it against refined candidates
975 nominateddeltas.append(deltainfo)
986 nominateddeltas.append(deltainfo)
976 for candidaterev in candidaterevs:
987 for candidaterev in candidaterevs:
977 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
988 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
978 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
989 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
979 nominateddeltas.append(candidatedelta)
990 nominateddeltas.append(candidatedelta)
980 if nominateddeltas:
991 if nominateddeltas:
981 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
992 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
982 if deltainfo is not None:
993 if deltainfo is not None:
983 candidaterevs = groups.send(deltainfo.base)
994 candidaterevs = groups.send(deltainfo.base)
984 else:
995 else:
985 candidaterevs = next(groups)
996 candidaterevs = next(groups)
986
997
987 if deltainfo is None:
998 if deltainfo is None:
988 deltainfo = self._fullsnapshotinfo(fh, revinfo)
999 deltainfo = self._fullsnapshotinfo(fh, revinfo)
989 return deltainfo
1000 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now