##// END OF EJS Templates
delta: filter nullrev out first...
Boris Feld -
r40993:f960c51e default
parent child Browse files
Show More
@@ -1,981 +1,981 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 )
33 )
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38 class _testrevlog(object):
38 class _testrevlog(object):
39 """minimalist fake revlog to use in doctests"""
39 """minimalist fake revlog to use in doctests"""
40
40
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 """data is an list of revision payload boundaries"""
42 """data is an list of revision payload boundaries"""
43 self._data = data
43 self._data = data
44 self._srdensitythreshold = density
44 self._srdensitythreshold = density
45 self._srmingapsize = mingap
45 self._srmingapsize = mingap
46 self._snapshot = set(snapshot)
46 self._snapshot = set(snapshot)
47 self.index = None
47 self.index = None
48
48
49 def start(self, rev):
49 def start(self, rev):
50 if rev == 0:
50 if rev == 0:
51 return 0
51 return 0
52 return self._data[rev - 1]
52 return self._data[rev - 1]
53
53
54 def end(self, rev):
54 def end(self, rev):
55 return self._data[rev]
55 return self._data[rev]
56
56
57 def length(self, rev):
57 def length(self, rev):
58 return self.end(rev) - self.start(rev)
58 return self.end(rev) - self.start(rev)
59
59
60 def __len__(self):
60 def __len__(self):
61 return len(self._data)
61 return len(self._data)
62
62
63 def issnapshot(self, rev):
63 def issnapshot(self, rev):
64 return rev in self._snapshot
64 return rev in self._snapshot
65
65
66 def slicechunk(revlog, revs, targetsize=None):
66 def slicechunk(revlog, revs, targetsize=None):
67 """slice revs to reduce the amount of unrelated data to be read from disk.
67 """slice revs to reduce the amount of unrelated data to be read from disk.
68
68
69 ``revs`` is sliced into groups that should be read in one time.
69 ``revs`` is sliced into groups that should be read in one time.
70 Assume that revs are sorted.
70 Assume that revs are sorted.
71
71
72 The initial chunk is sliced until the overall density (payload/chunks-span
72 The initial chunk is sliced until the overall density (payload/chunks-span
73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
73 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
74 `revlog._srmingapsize` is skipped.
74 `revlog._srmingapsize` is skipped.
75
75
76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
76 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
77 For consistency with other slicing choice, this limit won't go lower than
77 For consistency with other slicing choice, this limit won't go lower than
78 `revlog._srmingapsize`.
78 `revlog._srmingapsize`.
79
79
80 If individual revisions chunk are larger than this limit, they will still
80 If individual revisions chunk are larger than this limit, they will still
81 be raised individually.
81 be raised individually.
82
82
83 >>> data = [
83 >>> data = [
84 ... 5, #00 (5)
84 ... 5, #00 (5)
85 ... 10, #01 (5)
85 ... 10, #01 (5)
86 ... 12, #02 (2)
86 ... 12, #02 (2)
87 ... 12, #03 (empty)
87 ... 12, #03 (empty)
88 ... 27, #04 (15)
88 ... 27, #04 (15)
89 ... 31, #05 (4)
89 ... 31, #05 (4)
90 ... 31, #06 (empty)
90 ... 31, #06 (empty)
91 ... 42, #07 (11)
91 ... 42, #07 (11)
92 ... 47, #08 (5)
92 ... 47, #08 (5)
93 ... 47, #09 (empty)
93 ... 47, #09 (empty)
94 ... 48, #10 (1)
94 ... 48, #10 (1)
95 ... 51, #11 (3)
95 ... 51, #11 (3)
96 ... 74, #12 (23)
96 ... 74, #12 (23)
97 ... 85, #13 (11)
97 ... 85, #13 (11)
98 ... 86, #14 (1)
98 ... 86, #14 (1)
99 ... 91, #15 (5)
99 ... 91, #15 (5)
100 ... ]
100 ... ]
101 >>> revlog = _testrevlog(data, snapshot=range(16))
101 >>> revlog = _testrevlog(data, snapshot=range(16))
102
102
103 >>> list(slicechunk(revlog, list(range(16))))
103 >>> list(slicechunk(revlog, list(range(16))))
104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
104 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
105 >>> list(slicechunk(revlog, [0, 15]))
105 >>> list(slicechunk(revlog, [0, 15]))
106 [[0], [15]]
106 [[0], [15]]
107 >>> list(slicechunk(revlog, [0, 11, 15]))
107 >>> list(slicechunk(revlog, [0, 11, 15]))
108 [[0], [11], [15]]
108 [[0], [11], [15]]
109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
109 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
110 [[0], [11, 13, 15]]
110 [[0], [11, 13, 15]]
111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
111 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
112 [[1, 2], [5, 8, 10, 11], [14]]
112 [[1, 2], [5, 8, 10, 11], [14]]
113
113
114 Slicing with a maximum chunk size
114 Slicing with a maximum chunk size
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
116 [[0], [11], [13], [15]]
116 [[0], [11], [13], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
117 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
118 [[0], [11], [13, 15]]
118 [[0], [11], [13, 15]]
119 """
119 """
120 if targetsize is not None:
120 if targetsize is not None:
121 targetsize = max(targetsize, revlog._srmingapsize)
121 targetsize = max(targetsize, revlog._srmingapsize)
122 # targetsize should not be specified when evaluating delta candidates:
122 # targetsize should not be specified when evaluating delta candidates:
123 # * targetsize is used to ensure we stay within specification when reading,
123 # * targetsize is used to ensure we stay within specification when reading,
124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
124 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
125 if densityslicing is None:
125 if densityslicing is None:
126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
126 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
127 for chunk in densityslicing(revs,
127 for chunk in densityslicing(revs,
128 revlog._srdensitythreshold,
128 revlog._srdensitythreshold,
129 revlog._srmingapsize):
129 revlog._srmingapsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
130 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
131 yield subchunk
131 yield subchunk
132
132
133 def _slicechunktosize(revlog, revs, targetsize=None):
133 def _slicechunktosize(revlog, revs, targetsize=None):
134 """slice revs to match the target size
134 """slice revs to match the target size
135
135
136 This is intended to be used on chunk that density slicing selected by that
136 This is intended to be used on chunk that density slicing selected by that
137 are still too large compared to the read garantee of revlog. This might
137 are still too large compared to the read garantee of revlog. This might
138 happens when "minimal gap size" interrupted the slicing or when chain are
138 happens when "minimal gap size" interrupted the slicing or when chain are
139 built in a way that create large blocks next to each other.
139 built in a way that create large blocks next to each other.
140
140
141 >>> data = [
141 >>> data = [
142 ... 3, #0 (3)
142 ... 3, #0 (3)
143 ... 5, #1 (2)
143 ... 5, #1 (2)
144 ... 6, #2 (1)
144 ... 6, #2 (1)
145 ... 8, #3 (2)
145 ... 8, #3 (2)
146 ... 8, #4 (empty)
146 ... 8, #4 (empty)
147 ... 11, #5 (3)
147 ... 11, #5 (3)
148 ... 12, #6 (1)
148 ... 12, #6 (1)
149 ... 13, #7 (1)
149 ... 13, #7 (1)
150 ... 14, #8 (1)
150 ... 14, #8 (1)
151 ... ]
151 ... ]
152
152
153 == All snapshots cases ==
153 == All snapshots cases ==
154 >>> revlog = _testrevlog(data, snapshot=range(9))
154 >>> revlog = _testrevlog(data, snapshot=range(9))
155
155
156 Cases where chunk is already small enough
156 Cases where chunk is already small enough
157 >>> list(_slicechunktosize(revlog, [0], 3))
157 >>> list(_slicechunktosize(revlog, [0], 3))
158 [[0]]
158 [[0]]
159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
159 >>> list(_slicechunktosize(revlog, [6, 7], 3))
160 [[6, 7]]
160 [[6, 7]]
161 >>> list(_slicechunktosize(revlog, [0], None))
161 >>> list(_slicechunktosize(revlog, [0], None))
162 [[0]]
162 [[0]]
163 >>> list(_slicechunktosize(revlog, [6, 7], None))
163 >>> list(_slicechunktosize(revlog, [6, 7], None))
164 [[6, 7]]
164 [[6, 7]]
165
165
166 cases where we need actual slicing
166 cases where we need actual slicing
167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
167 >>> list(_slicechunktosize(revlog, [0, 1], 3))
168 [[0], [1]]
168 [[0], [1]]
169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
169 >>> list(_slicechunktosize(revlog, [1, 3], 3))
170 [[1], [3]]
170 [[1], [3]]
171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
171 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
172 [[1, 2], [3]]
172 [[1, 2], [3]]
173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
173 >>> list(_slicechunktosize(revlog, [3, 5], 3))
174 [[3], [5]]
174 [[3], [5]]
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
176 [[3], [5]]
176 [[3], [5]]
177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
177 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
178 [[5], [6, 7, 8]]
178 [[5], [6, 7, 8]]
179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
179 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
180 [[0], [1, 2], [3], [5], [6, 7, 8]]
180 [[0], [1, 2], [3], [5], [6, 7, 8]]
181
181
182 Case with too large individual chunk (must return valid chunk)
182 Case with too large individual chunk (must return valid chunk)
183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
183 >>> list(_slicechunktosize(revlog, [0, 1], 2))
184 [[0], [1]]
184 [[0], [1]]
185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
185 >>> list(_slicechunktosize(revlog, [1, 3], 1))
186 [[1], [3]]
186 [[1], [3]]
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
187 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
188 [[3], [5]]
188 [[3], [5]]
189
189
190 == No Snapshot cases ==
190 == No Snapshot cases ==
191 >>> revlog = _testrevlog(data)
191 >>> revlog = _testrevlog(data)
192
192
193 Cases where chunk is already small enough
193 Cases where chunk is already small enough
194 >>> list(_slicechunktosize(revlog, [0], 3))
194 >>> list(_slicechunktosize(revlog, [0], 3))
195 [[0]]
195 [[0]]
196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
196 >>> list(_slicechunktosize(revlog, [6, 7], 3))
197 [[6, 7]]
197 [[6, 7]]
198 >>> list(_slicechunktosize(revlog, [0], None))
198 >>> list(_slicechunktosize(revlog, [0], None))
199 [[0]]
199 [[0]]
200 >>> list(_slicechunktosize(revlog, [6, 7], None))
200 >>> list(_slicechunktosize(revlog, [6, 7], None))
201 [[6, 7]]
201 [[6, 7]]
202
202
203 cases where we need actual slicing
203 cases where we need actual slicing
204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
204 >>> list(_slicechunktosize(revlog, [0, 1], 3))
205 [[0], [1]]
205 [[0], [1]]
206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
206 >>> list(_slicechunktosize(revlog, [1, 3], 3))
207 [[1], [3]]
207 [[1], [3]]
208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
208 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
209 [[1], [2, 3]]
209 [[1], [2, 3]]
210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
210 >>> list(_slicechunktosize(revlog, [3, 5], 3))
211 [[3], [5]]
211 [[3], [5]]
212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
212 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
213 [[3], [4, 5]]
213 [[3], [4, 5]]
214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
214 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
215 [[5], [6, 7, 8]]
215 [[5], [6, 7, 8]]
216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
216 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
217 [[0], [1, 2], [3], [5], [6, 7, 8]]
217 [[0], [1, 2], [3], [5], [6, 7, 8]]
218
218
219 Case with too large individual chunk (must return valid chunk)
219 Case with too large individual chunk (must return valid chunk)
220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
220 >>> list(_slicechunktosize(revlog, [0, 1], 2))
221 [[0], [1]]
221 [[0], [1]]
222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
222 >>> list(_slicechunktosize(revlog, [1, 3], 1))
223 [[1], [3]]
223 [[1], [3]]
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
224 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
225 [[3], [5]]
225 [[3], [5]]
226
226
227 == mixed case ==
227 == mixed case ==
228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
228 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
229 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
230 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
231 """
231 """
232 assert targetsize is None or 0 <= targetsize
232 assert targetsize is None or 0 <= targetsize
233 startdata = revlog.start(revs[0])
233 startdata = revlog.start(revs[0])
234 enddata = revlog.end(revs[-1])
234 enddata = revlog.end(revs[-1])
235 fullspan = enddata - startdata
235 fullspan = enddata - startdata
236 if targetsize is None or fullspan <= targetsize:
236 if targetsize is None or fullspan <= targetsize:
237 yield revs
237 yield revs
238 return
238 return
239
239
240 startrevidx = 0
240 startrevidx = 0
241 endrevidx = 1
241 endrevidx = 1
242 iterrevs = enumerate(revs)
242 iterrevs = enumerate(revs)
243 next(iterrevs) # skip first rev.
243 next(iterrevs) # skip first rev.
244 # first step: get snapshots out of the way
244 # first step: get snapshots out of the way
245 for idx, r in iterrevs:
245 for idx, r in iterrevs:
246 span = revlog.end(r) - startdata
246 span = revlog.end(r) - startdata
247 snapshot = revlog.issnapshot(r)
247 snapshot = revlog.issnapshot(r)
248 if span <= targetsize and snapshot:
248 if span <= targetsize and snapshot:
249 endrevidx = idx + 1
249 endrevidx = idx + 1
250 else:
250 else:
251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
251 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
252 if chunk:
252 if chunk:
253 yield chunk
253 yield chunk
254 startrevidx = idx
254 startrevidx = idx
255 startdata = revlog.start(r)
255 startdata = revlog.start(r)
256 endrevidx = idx + 1
256 endrevidx = idx + 1
257 if not snapshot:
257 if not snapshot:
258 break
258 break
259
259
260 # for the others, we use binary slicing to quickly converge toward valid
260 # for the others, we use binary slicing to quickly converge toward valid
261 # chunks (otherwise, we might end up looking for start/end of many
261 # chunks (otherwise, we might end up looking for start/end of many
262 # revisions). This logic is not looking for the perfect slicing point, it
262 # revisions). This logic is not looking for the perfect slicing point, it
263 # focuses on quickly converging toward valid chunks.
263 # focuses on quickly converging toward valid chunks.
264 nbitem = len(revs)
264 nbitem = len(revs)
265 while (enddata - startdata) > targetsize:
265 while (enddata - startdata) > targetsize:
266 endrevidx = nbitem
266 endrevidx = nbitem
267 if nbitem - startrevidx <= 1:
267 if nbitem - startrevidx <= 1:
268 break # protect against individual chunk larger than limit
268 break # protect against individual chunk larger than limit
269 localenddata = revlog.end(revs[endrevidx - 1])
269 localenddata = revlog.end(revs[endrevidx - 1])
270 span = localenddata - startdata
270 span = localenddata - startdata
271 while span > targetsize:
271 while span > targetsize:
272 if endrevidx - startrevidx <= 1:
272 if endrevidx - startrevidx <= 1:
273 break # protect against individual chunk larger than limit
273 break # protect against individual chunk larger than limit
274 endrevidx -= (endrevidx - startrevidx) // 2
274 endrevidx -= (endrevidx - startrevidx) // 2
275 localenddata = revlog.end(revs[endrevidx - 1])
275 localenddata = revlog.end(revs[endrevidx - 1])
276 span = localenddata - startdata
276 span = localenddata - startdata
277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
277 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
278 if chunk:
278 if chunk:
279 yield chunk
279 yield chunk
280 startrevidx = endrevidx
280 startrevidx = endrevidx
281 startdata = revlog.start(revs[startrevidx])
281 startdata = revlog.start(revs[startrevidx])
282
282
283 chunk = _trimchunk(revlog, revs, startrevidx)
283 chunk = _trimchunk(revlog, revs, startrevidx)
284 if chunk:
284 if chunk:
285 yield chunk
285 yield chunk
286
286
287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
287 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
288 mingapsize=0):
288 mingapsize=0):
289 """slice revs to reduce the amount of unrelated data to be read from disk.
289 """slice revs to reduce the amount of unrelated data to be read from disk.
290
290
291 ``revs`` is sliced into groups that should be read in one time.
291 ``revs`` is sliced into groups that should be read in one time.
292 Assume that revs are sorted.
292 Assume that revs are sorted.
293
293
294 The initial chunk is sliced until the overall density (payload/chunks-span
294 The initial chunk is sliced until the overall density (payload/chunks-span
295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
295 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
296 skipped.
296 skipped.
297
297
298 >>> revlog = _testrevlog([
298 >>> revlog = _testrevlog([
299 ... 5, #00 (5)
299 ... 5, #00 (5)
300 ... 10, #01 (5)
300 ... 10, #01 (5)
301 ... 12, #02 (2)
301 ... 12, #02 (2)
302 ... 12, #03 (empty)
302 ... 12, #03 (empty)
303 ... 27, #04 (15)
303 ... 27, #04 (15)
304 ... 31, #05 (4)
304 ... 31, #05 (4)
305 ... 31, #06 (empty)
305 ... 31, #06 (empty)
306 ... 42, #07 (11)
306 ... 42, #07 (11)
307 ... 47, #08 (5)
307 ... 47, #08 (5)
308 ... 47, #09 (empty)
308 ... 47, #09 (empty)
309 ... 48, #10 (1)
309 ... 48, #10 (1)
310 ... 51, #11 (3)
310 ... 51, #11 (3)
311 ... 74, #12 (23)
311 ... 74, #12 (23)
312 ... 85, #13 (11)
312 ... 85, #13 (11)
313 ... 86, #14 (1)
313 ... 86, #14 (1)
314 ... 91, #15 (5)
314 ... 91, #15 (5)
315 ... ])
315 ... ])
316
316
317 >>> list(_slicechunktodensity(revlog, list(range(16))))
317 >>> list(_slicechunktodensity(revlog, list(range(16))))
318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
318 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
319 >>> list(_slicechunktodensity(revlog, [0, 15]))
319 >>> list(_slicechunktodensity(revlog, [0, 15]))
320 [[0], [15]]
320 [[0], [15]]
321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
321 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
322 [[0], [11], [15]]
322 [[0], [11], [15]]
323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
323 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
324 [[0], [11, 13, 15]]
324 [[0], [11, 13, 15]]
325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
325 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
326 [[1, 2], [5, 8, 10, 11], [14]]
326 [[1, 2], [5, 8, 10, 11], [14]]
327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
327 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
328 ... mingapsize=20))
328 ... mingapsize=20))
329 [[1, 2, 3, 5, 8, 10, 11], [14]]
329 [[1, 2, 3, 5, 8, 10, 11], [14]]
330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
330 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
331 ... targetdensity=0.95))
331 ... targetdensity=0.95))
332 [[1, 2], [5], [8, 10, 11], [14]]
332 [[1, 2], [5], [8, 10, 11], [14]]
333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
333 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
334 ... targetdensity=0.95, mingapsize=12))
334 ... targetdensity=0.95, mingapsize=12))
335 [[1, 2], [5, 8, 10, 11], [14]]
335 [[1, 2], [5, 8, 10, 11], [14]]
336 """
336 """
337 start = revlog.start
337 start = revlog.start
338 length = revlog.length
338 length = revlog.length
339
339
340 if len(revs) <= 1:
340 if len(revs) <= 1:
341 yield revs
341 yield revs
342 return
342 return
343
343
344 deltachainspan = segmentspan(revlog, revs)
344 deltachainspan = segmentspan(revlog, revs)
345
345
346 if deltachainspan < mingapsize:
346 if deltachainspan < mingapsize:
347 yield revs
347 yield revs
348 return
348 return
349
349
350 readdata = deltachainspan
350 readdata = deltachainspan
351 chainpayload = sum(length(r) for r in revs)
351 chainpayload = sum(length(r) for r in revs)
352
352
353 if deltachainspan:
353 if deltachainspan:
354 density = chainpayload / float(deltachainspan)
354 density = chainpayload / float(deltachainspan)
355 else:
355 else:
356 density = 1.0
356 density = 1.0
357
357
358 if density >= targetdensity:
358 if density >= targetdensity:
359 yield revs
359 yield revs
360 return
360 return
361
361
362 # Store the gaps in a heap to have them sorted by decreasing size
362 # Store the gaps in a heap to have them sorted by decreasing size
363 gaps = []
363 gaps = []
364 prevend = None
364 prevend = None
365 for i, rev in enumerate(revs):
365 for i, rev in enumerate(revs):
366 revstart = start(rev)
366 revstart = start(rev)
367 revlen = length(rev)
367 revlen = length(rev)
368
368
369 # Skip empty revisions to form larger holes
369 # Skip empty revisions to form larger holes
370 if revlen == 0:
370 if revlen == 0:
371 continue
371 continue
372
372
373 if prevend is not None:
373 if prevend is not None:
374 gapsize = revstart - prevend
374 gapsize = revstart - prevend
375 # only consider holes that are large enough
375 # only consider holes that are large enough
376 if gapsize > mingapsize:
376 if gapsize > mingapsize:
377 gaps.append((gapsize, i))
377 gaps.append((gapsize, i))
378
378
379 prevend = revstart + revlen
379 prevend = revstart + revlen
380 # sort the gaps to pop them from largest to small
380 # sort the gaps to pop them from largest to small
381 gaps.sort()
381 gaps.sort()
382
382
383 # Collect the indices of the largest holes until the density is acceptable
383 # Collect the indices of the largest holes until the density is acceptable
384 selected = []
384 selected = []
385 while gaps and density < targetdensity:
385 while gaps and density < targetdensity:
386 gapsize, gapidx = gaps.pop()
386 gapsize, gapidx = gaps.pop()
387
387
388 selected.append(gapidx)
388 selected.append(gapidx)
389
389
390 # the gap sizes are stored as negatives to be sorted decreasingly
390 # the gap sizes are stored as negatives to be sorted decreasingly
391 # by the heap
391 # by the heap
392 readdata -= gapsize
392 readdata -= gapsize
393 if readdata > 0:
393 if readdata > 0:
394 density = chainpayload / float(readdata)
394 density = chainpayload / float(readdata)
395 else:
395 else:
396 density = 1.0
396 density = 1.0
397 selected.sort()
397 selected.sort()
398
398
399 # Cut the revs at collected indices
399 # Cut the revs at collected indices
400 previdx = 0
400 previdx = 0
401 for idx in selected:
401 for idx in selected:
402
402
403 chunk = _trimchunk(revlog, revs, previdx, idx)
403 chunk = _trimchunk(revlog, revs, previdx, idx)
404 if chunk:
404 if chunk:
405 yield chunk
405 yield chunk
406
406
407 previdx = idx
407 previdx = idx
408
408
409 chunk = _trimchunk(revlog, revs, previdx)
409 chunk = _trimchunk(revlog, revs, previdx)
410 if chunk:
410 if chunk:
411 yield chunk
411 yield chunk
412
412
413 def _trimchunk(revlog, revs, startidx, endidx=None):
413 def _trimchunk(revlog, revs, startidx, endidx=None):
414 """returns revs[startidx:endidx] without empty trailing revs
414 """returns revs[startidx:endidx] without empty trailing revs
415
415
416 Doctest Setup
416 Doctest Setup
417 >>> revlog = _testrevlog([
417 >>> revlog = _testrevlog([
418 ... 5, #0
418 ... 5, #0
419 ... 10, #1
419 ... 10, #1
420 ... 12, #2
420 ... 12, #2
421 ... 12, #3 (empty)
421 ... 12, #3 (empty)
422 ... 17, #4
422 ... 17, #4
423 ... 21, #5
423 ... 21, #5
424 ... 21, #6 (empty)
424 ... 21, #6 (empty)
425 ... ])
425 ... ])
426
426
427 Contiguous cases:
427 Contiguous cases:
428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
428 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
429 [0, 1, 2, 3, 4, 5]
429 [0, 1, 2, 3, 4, 5]
430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
430 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
431 [0, 1, 2, 3, 4]
431 [0, 1, 2, 3, 4]
432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
432 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
433 [0, 1, 2]
433 [0, 1, 2]
434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
434 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
435 [2]
435 [2]
436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
436 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
437 [3, 4, 5]
437 [3, 4, 5]
438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
438 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
439 [3, 4]
439 [3, 4]
440
440
441 Discontiguous cases:
441 Discontiguous cases:
442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
442 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
443 [1, 3, 5]
443 [1, 3, 5]
444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
444 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
445 [1]
445 [1]
446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
446 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
447 [3, 5]
447 [3, 5]
448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
448 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
449 [3, 5]
449 [3, 5]
450 """
450 """
451 length = revlog.length
451 length = revlog.length
452
452
453 if endidx is None:
453 if endidx is None:
454 endidx = len(revs)
454 endidx = len(revs)
455
455
456 # If we have a non-emtpy delta candidate, there are nothing to trim
456 # If we have a non-emtpy delta candidate, there are nothing to trim
457 if revs[endidx - 1] < len(revlog):
457 if revs[endidx - 1] < len(revlog):
458 # Trim empty revs at the end, except the very first revision of a chain
458 # Trim empty revs at the end, except the very first revision of a chain
459 while (endidx > 1
459 while (endidx > 1
460 and endidx > startidx
460 and endidx > startidx
461 and length(revs[endidx - 1]) == 0):
461 and length(revs[endidx - 1]) == 0):
462 endidx -= 1
462 endidx -= 1
463
463
464 return revs[startidx:endidx]
464 return revs[startidx:endidx]
465
465
466 def segmentspan(revlog, revs):
466 def segmentspan(revlog, revs):
467 """Get the byte span of a segment of revisions
467 """Get the byte span of a segment of revisions
468
468
469 revs is a sorted array of revision numbers
469 revs is a sorted array of revision numbers
470
470
471 >>> revlog = _testrevlog([
471 >>> revlog = _testrevlog([
472 ... 5, #0
472 ... 5, #0
473 ... 10, #1
473 ... 10, #1
474 ... 12, #2
474 ... 12, #2
475 ... 12, #3 (empty)
475 ... 12, #3 (empty)
476 ... 17, #4
476 ... 17, #4
477 ... ])
477 ... ])
478
478
479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
479 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
480 17
480 17
481 >>> segmentspan(revlog, [0, 4])
481 >>> segmentspan(revlog, [0, 4])
482 17
482 17
483 >>> segmentspan(revlog, [3, 4])
483 >>> segmentspan(revlog, [3, 4])
484 5
484 5
485 >>> segmentspan(revlog, [1, 2, 3,])
485 >>> segmentspan(revlog, [1, 2, 3,])
486 7
486 7
487 >>> segmentspan(revlog, [1, 3])
487 >>> segmentspan(revlog, [1, 3])
488 7
488 7
489 """
489 """
490 if not revs:
490 if not revs:
491 return 0
491 return 0
492 end = revlog.end(revs[-1])
492 end = revlog.end(revs[-1])
493 return end - revlog.start(revs[0])
493 return end - revlog.start(revs[0])
494
494
495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
495 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
496 """build full text from a (base, delta) pair and other metadata"""
496 """build full text from a (base, delta) pair and other metadata"""
497 # special case deltas which replace entire base; no need to decode
497 # special case deltas which replace entire base; no need to decode
498 # base revision. this neatly avoids censored bases, which throw when
498 # base revision. this neatly avoids censored bases, which throw when
499 # they're decoded.
499 # they're decoded.
500 hlen = struct.calcsize(">lll")
500 hlen = struct.calcsize(">lll")
501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
501 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
502 len(delta) - hlen):
502 len(delta) - hlen):
503 fulltext = delta[hlen:]
503 fulltext = delta[hlen:]
504 else:
504 else:
505 # deltabase is rawtext before changed by flag processors, which is
505 # deltabase is rawtext before changed by flag processors, which is
506 # equivalent to non-raw text
506 # equivalent to non-raw text
507 basetext = revlog.revision(baserev, _df=fh, raw=False)
507 basetext = revlog.revision(baserev, _df=fh, raw=False)
508 fulltext = mdiff.patch(basetext, delta)
508 fulltext = mdiff.patch(basetext, delta)
509
509
510 try:
510 try:
511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
511 res = revlog._processflags(fulltext, flags, 'read', raw=True)
512 fulltext, validatehash = res
512 fulltext, validatehash = res
513 if validatehash:
513 if validatehash:
514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
514 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
515 if flags & REVIDX_ISCENSORED:
515 if flags & REVIDX_ISCENSORED:
516 raise error.StorageError(_('node %s is not censored') %
516 raise error.StorageError(_('node %s is not censored') %
517 expectednode)
517 expectednode)
518 except error.CensoredNodeError:
518 except error.CensoredNodeError:
519 # must pass the censored index flag to add censored revisions
519 # must pass the censored index flag to add censored revisions
520 if not flags & REVIDX_ISCENSORED:
520 if not flags & REVIDX_ISCENSORED:
521 raise
521 raise
522 return fulltext
522 return fulltext
523
523
524 @attr.s(slots=True, frozen=True)
524 @attr.s(slots=True, frozen=True)
525 class _deltainfo(object):
525 class _deltainfo(object):
526 distance = attr.ib()
526 distance = attr.ib()
527 deltalen = attr.ib()
527 deltalen = attr.ib()
528 data = attr.ib()
528 data = attr.ib()
529 base = attr.ib()
529 base = attr.ib()
530 chainbase = attr.ib()
530 chainbase = attr.ib()
531 chainlen = attr.ib()
531 chainlen = attr.ib()
532 compresseddeltalen = attr.ib()
532 compresseddeltalen = attr.ib()
533 snapshotdepth = attr.ib()
533 snapshotdepth = attr.ib()
534
534
535 def isgooddeltainfo(revlog, deltainfo, revinfo):
535 def isgooddeltainfo(revlog, deltainfo, revinfo):
536 """Returns True if the given delta is good. Good means that it is within
536 """Returns True if the given delta is good. Good means that it is within
537 the disk span, disk size, and chain length bounds that we know to be
537 the disk span, disk size, and chain length bounds that we know to be
538 performant."""
538 performant."""
539 if deltainfo is None:
539 if deltainfo is None:
540 return False
540 return False
541
541
542 # - 'deltainfo.distance' is the distance from the base revision --
542 # - 'deltainfo.distance' is the distance from the base revision --
543 # bounding it limits the amount of I/O we need to do.
543 # bounding it limits the amount of I/O we need to do.
544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
544 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
545 # deltas we need to apply -- bounding it limits the amount of CPU
545 # deltas we need to apply -- bounding it limits the amount of CPU
546 # we consume.
546 # we consume.
547
547
548 textlen = revinfo.textlen
548 textlen = revinfo.textlen
549 defaultmax = textlen * 4
549 defaultmax = textlen * 4
550 maxdist = revlog._maxdeltachainspan
550 maxdist = revlog._maxdeltachainspan
551 if not maxdist:
551 if not maxdist:
552 maxdist = deltainfo.distance # ensure the conditional pass
552 maxdist = deltainfo.distance # ensure the conditional pass
553 maxdist = max(maxdist, defaultmax)
553 maxdist = max(maxdist, defaultmax)
554
554
555 # Bad delta from read span:
555 # Bad delta from read span:
556 #
556 #
557 # If the span of data read is larger than the maximum allowed.
557 # If the span of data read is larger than the maximum allowed.
558 #
558 #
559 # In the sparse-revlog case, we rely on the associated "sparse reading"
559 # In the sparse-revlog case, we rely on the associated "sparse reading"
560 # to avoid issue related to the span of data. In theory, it would be
560 # to avoid issue related to the span of data. In theory, it would be
561 # possible to build pathological revlog where delta pattern would lead
561 # possible to build pathological revlog where delta pattern would lead
562 # to too many reads. However, they do not happen in practice at all. So
562 # to too many reads. However, they do not happen in practice at all. So
563 # we skip the span check entirely.
563 # we skip the span check entirely.
564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
564 if not revlog._sparserevlog and maxdist < deltainfo.distance:
565 return False
565 return False
566
566
567 # Bad delta from new delta size:
567 # Bad delta from new delta size:
568 #
568 #
569 # If the delta size is larger than the target text, storing the
569 # If the delta size is larger than the target text, storing the
570 # delta will be inefficient.
570 # delta will be inefficient.
571 if textlen < deltainfo.deltalen:
571 if textlen < deltainfo.deltalen:
572 return False
572 return False
573
573
574 # Bad delta from cumulated payload size:
574 # Bad delta from cumulated payload size:
575 #
575 #
576 # If the sum of delta get larger than K * target text length.
576 # If the sum of delta get larger than K * target text length.
577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
577 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
578 return False
578 return False
579
579
580 # Bad delta from chain length:
580 # Bad delta from chain length:
581 #
581 #
582 # If the number of delta in the chain gets too high.
582 # If the number of delta in the chain gets too high.
583 if (revlog._maxchainlen
583 if (revlog._maxchainlen
584 and revlog._maxchainlen < deltainfo.chainlen):
584 and revlog._maxchainlen < deltainfo.chainlen):
585 return False
585 return False
586
586
587 # bad delta from intermediate snapshot size limit
587 # bad delta from intermediate snapshot size limit
588 #
588 #
589 # If an intermediate snapshot size is higher than the limit. The
589 # If an intermediate snapshot size is higher than the limit. The
590 # limit exist to prevent endless chain of intermediate delta to be
590 # limit exist to prevent endless chain of intermediate delta to be
591 # created.
591 # created.
592 if (deltainfo.snapshotdepth is not None and
592 if (deltainfo.snapshotdepth is not None and
593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
593 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
594 return False
594 return False
595
595
596 # bad delta if new intermediate snapshot is larger than the previous
596 # bad delta if new intermediate snapshot is larger than the previous
597 # snapshot
597 # snapshot
598 if (deltainfo.snapshotdepth
598 if (deltainfo.snapshotdepth
599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
599 and revlog.length(deltainfo.base) < deltainfo.deltalen):
600 return False
600 return False
601
601
602 return True
602 return True
603
603
604 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
604 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
605 """Provides group of revision to be tested as delta base
605 """Provides group of revision to be tested as delta base
606
606
607 This top level function focus on emitting groups with unique and worthwhile
607 This top level function focus on emitting groups with unique and worthwhile
608 content. See _raw_candidate_groups for details about the group order.
608 content. See _raw_candidate_groups for details about the group order.
609 """
609 """
610 # should we try to build a delta?
610 # should we try to build a delta?
611 if not (len(revlog) and revlog._storedeltachains):
611 if not (len(revlog) and revlog._storedeltachains):
612 yield None
612 yield None
613 return
613 return
614
614
615 deltalength = revlog.length
615 deltalength = revlog.length
616 deltaparent = revlog.deltaparent
616 deltaparent = revlog.deltaparent
617 good = None
617 good = None
618
618
619 deltas_limit = textlen * LIMIT_DELTA2TEXT
619 deltas_limit = textlen * LIMIT_DELTA2TEXT
620
620
621 tested = set([nullrev])
621 tested = set([nullrev])
622 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
622 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
623 while True:
623 while True:
624 temptative = candidates.send(good)
624 temptative = candidates.send(good)
625 if temptative is None:
625 if temptative is None:
626 break
626 break
627 group = []
627 group = []
628 for rev in temptative:
628 for rev in temptative:
629 # skip over empty delta (no need to include them in a chain)
629 # skip over empty delta (no need to include them in a chain)
630 while (revlog._generaldelta
630 while (revlog._generaldelta
631 and not (rev == nullrev
631 and not (rev == nullrev
632 or rev in tested
632 or rev in tested
633 or deltalength(rev))):
633 or deltalength(rev))):
634 tested.add(rev)
634 tested.add(rev)
635 rev = deltaparent(rev)
635 rev = deltaparent(rev)
636 # no need to try a delta against nullrev, this will be done as a
637 # last resort.
638 if rev == nullrev:
639 continue
636 # filter out revision we tested already
640 # filter out revision we tested already
637 if rev in tested:
641 if rev in tested:
638 continue
642 continue
639 tested.add(rev)
643 tested.add(rev)
640 # filter out delta base that will never produce good delta
644 # filter out delta base that will never produce good delta
641 if deltas_limit < revlog.length(rev):
645 if deltas_limit < revlog.length(rev):
642 continue
646 continue
643 # no need to try a delta against nullrev, this will be done as a
644 # last resort.
645 if rev == nullrev:
646 continue
647 # no delta for rawtext-changing revs (see "candelta" for why)
647 # no delta for rawtext-changing revs (see "candelta" for why)
648 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
648 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
649 continue
649 continue
650 group.append(rev)
650 group.append(rev)
651 if group:
651 if group:
652 # XXX: in the sparse revlog case, group can become large,
652 # XXX: in the sparse revlog case, group can become large,
653 # impacting performances. Some bounding or slicing mecanism
653 # impacting performances. Some bounding or slicing mecanism
654 # would help to reduce this impact.
654 # would help to reduce this impact.
655 good = yield tuple(group)
655 good = yield tuple(group)
656 yield None
656 yield None
657
657
658 def _findsnapshots(revlog, cache, start_rev):
658 def _findsnapshots(revlog, cache, start_rev):
659 """find snapshot from start_rev to tip"""
659 """find snapshot from start_rev to tip"""
660 deltaparent = revlog.deltaparent
660 deltaparent = revlog.deltaparent
661 issnapshot = revlog.issnapshot
661 issnapshot = revlog.issnapshot
662 for rev in revlog.revs(start_rev):
662 for rev in revlog.revs(start_rev):
663 if issnapshot(rev):
663 if issnapshot(rev):
664 cache[deltaparent(rev)].append(rev)
664 cache[deltaparent(rev)].append(rev)
665
665
666 def _refinedgroups(revlog, p1, p2, cachedelta):
666 def _refinedgroups(revlog, p1, p2, cachedelta):
667 good = None
667 good = None
668 # First we try to reuse a the delta contained in the bundle.
668 # First we try to reuse a the delta contained in the bundle.
669 # (or from the source revlog)
669 # (or from the source revlog)
670 #
670 #
671 # This logic only applies to general delta repositories and can be disabled
671 # This logic only applies to general delta repositories and can be disabled
672 # through configuration. Disabling reuse source delta is useful when
672 # through configuration. Disabling reuse source delta is useful when
673 # we want to make sure we recomputed "optimal" deltas.
673 # we want to make sure we recomputed "optimal" deltas.
674 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
674 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
675 # Assume what we received from the server is a good choice
675 # Assume what we received from the server is a good choice
676 # build delta will reuse the cache
676 # build delta will reuse the cache
677 good = yield (cachedelta[0],)
677 good = yield (cachedelta[0],)
678 if good is not None:
678 if good is not None:
679 yield None
679 yield None
680 return
680 return
681 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
681 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
682 good = yield candidates
682 good = yield candidates
683 if good is not None:
683 if good is not None:
684 break
684 break
685
685
686 # If sparse revlog is enabled, we can try to refine the available deltas
686 # If sparse revlog is enabled, we can try to refine the available deltas
687 if not revlog._sparserevlog:
687 if not revlog._sparserevlog:
688 yield None
688 yield None
689 return
689 return
690
690
691 # if we have a refinable value, try to refine it
691 # if we have a refinable value, try to refine it
692 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
692 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
693 # refine snapshot down
693 # refine snapshot down
694 previous = None
694 previous = None
695 while previous != good:
695 while previous != good:
696 previous = good
696 previous = good
697 base = revlog.deltaparent(good)
697 base = revlog.deltaparent(good)
698 if base == nullrev:
698 if base == nullrev:
699 break
699 break
700 good = yield (base,)
700 good = yield (base,)
701 # refine snapshot up
701 # refine snapshot up
702 #
702 #
703 # XXX the _findsnapshots call can be expensive and is "duplicated" with
703 # XXX the _findsnapshots call can be expensive and is "duplicated" with
704 # the one done in `_rawgroups`. Once we start working on performance,
704 # the one done in `_rawgroups`. Once we start working on performance,
705 # we should make the two logics share this computation.
705 # we should make the two logics share this computation.
706 snapshots = collections.defaultdict(list)
706 snapshots = collections.defaultdict(list)
707 _findsnapshots(revlog, snapshots, good + 1)
707 _findsnapshots(revlog, snapshots, good + 1)
708 previous = None
708 previous = None
709 while good != previous:
709 while good != previous:
710 previous = good
710 previous = good
711 children = tuple(sorted(c for c in snapshots[good]))
711 children = tuple(sorted(c for c in snapshots[good]))
712 good = yield children
712 good = yield children
713
713
714 # we have found nothing
714 # we have found nothing
715 yield None
715 yield None
716
716
717 def _rawgroups(revlog, p1, p2, cachedelta):
717 def _rawgroups(revlog, p1, p2, cachedelta):
718 """Provides group of revision to be tested as delta base
718 """Provides group of revision to be tested as delta base
719
719
720 This lower level function focus on emitting delta theorically interresting
720 This lower level function focus on emitting delta theorically interresting
721 without looking it any practical details.
721 without looking it any practical details.
722
722
723 The group order aims at providing fast or small candidates first.
723 The group order aims at providing fast or small candidates first.
724 """
724 """
725 gdelta = revlog._generaldelta
725 gdelta = revlog._generaldelta
726 sparse = revlog._sparserevlog
726 sparse = revlog._sparserevlog
727 curr = len(revlog)
727 curr = len(revlog)
728 prev = curr - 1
728 prev = curr - 1
729 deltachain = lambda rev: revlog._deltachain(rev)[0]
729 deltachain = lambda rev: revlog._deltachain(rev)[0]
730
730
731 if gdelta:
731 if gdelta:
732 # exclude already lazy tested base if any
732 # exclude already lazy tested base if any
733 parents = [p for p in (p1, p2) if p != nullrev]
733 parents = [p for p in (p1, p2) if p != nullrev]
734
734
735 if not revlog._deltabothparents and len(parents) == 2:
735 if not revlog._deltabothparents and len(parents) == 2:
736 parents.sort()
736 parents.sort()
737 # To minimize the chance of having to build a fulltext,
737 # To minimize the chance of having to build a fulltext,
738 # pick first whichever parent is closest to us (max rev)
738 # pick first whichever parent is closest to us (max rev)
739 yield (parents[1],)
739 yield (parents[1],)
740 # then the other one (min rev) if the first did not fit
740 # then the other one (min rev) if the first did not fit
741 yield (parents[0],)
741 yield (parents[0],)
742 elif len(parents) > 0:
742 elif len(parents) > 0:
743 # Test all parents (1 or 2), and keep the best candidate
743 # Test all parents (1 or 2), and keep the best candidate
744 yield parents
744 yield parents
745
745
746 if sparse and parents:
746 if sparse and parents:
747 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
747 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
748 # See if we can use an existing snapshot in the parent chains to use as
748 # See if we can use an existing snapshot in the parent chains to use as
749 # a base for a new intermediate-snapshot
749 # a base for a new intermediate-snapshot
750 #
750 #
751 # search for snapshot in parents delta chain
751 # search for snapshot in parents delta chain
752 # map: snapshot-level: snapshot-rev
752 # map: snapshot-level: snapshot-rev
753 parents_snaps = collections.defaultdict(set)
753 parents_snaps = collections.defaultdict(set)
754 candidate_chains = [deltachain(p) for p in parents]
754 candidate_chains = [deltachain(p) for p in parents]
755 for chain in candidate_chains:
755 for chain in candidate_chains:
756 for idx, s in enumerate(chain):
756 for idx, s in enumerate(chain):
757 if not revlog.issnapshot(s):
757 if not revlog.issnapshot(s):
758 break
758 break
759 parents_snaps[idx].add(s)
759 parents_snaps[idx].add(s)
760 snapfloor = min(parents_snaps[0]) + 1
760 snapfloor = min(parents_snaps[0]) + 1
761 _findsnapshots(revlog, snapshots, snapfloor)
761 _findsnapshots(revlog, snapshots, snapfloor)
762 # search for the highest "unrelated" revision
762 # search for the highest "unrelated" revision
763 #
763 #
764 # Adding snapshots used by "unrelated" revision increase the odd we
764 # Adding snapshots used by "unrelated" revision increase the odd we
765 # reuse an independant, yet better snapshot chain.
765 # reuse an independant, yet better snapshot chain.
766 #
766 #
767 # XXX instead of building a set of revisions, we could lazily enumerate
767 # XXX instead of building a set of revisions, we could lazily enumerate
768 # over the chains. That would be more efficient, however we stick to
768 # over the chains. That would be more efficient, however we stick to
769 # simple code for now.
769 # simple code for now.
770 all_revs = set()
770 all_revs = set()
771 for chain in candidate_chains:
771 for chain in candidate_chains:
772 all_revs.update(chain)
772 all_revs.update(chain)
773 other = None
773 other = None
774 for r in revlog.revs(prev, snapfloor):
774 for r in revlog.revs(prev, snapfloor):
775 if r not in all_revs:
775 if r not in all_revs:
776 other = r
776 other = r
777 break
777 break
778 if other is not None:
778 if other is not None:
779 # To avoid unfair competition, we won't use unrelated intermediate
779 # To avoid unfair competition, we won't use unrelated intermediate
780 # snapshot that are deeper than the ones from the parent delta
780 # snapshot that are deeper than the ones from the parent delta
781 # chain.
781 # chain.
782 max_depth = max(parents_snaps.keys())
782 max_depth = max(parents_snaps.keys())
783 chain = deltachain(other)
783 chain = deltachain(other)
784 for idx, s in enumerate(chain):
784 for idx, s in enumerate(chain):
785 if s < snapfloor:
785 if s < snapfloor:
786 continue
786 continue
787 if max_depth < idx:
787 if max_depth < idx:
788 break
788 break
789 if not revlog.issnapshot(s):
789 if not revlog.issnapshot(s):
790 break
790 break
791 parents_snaps[idx].add(s)
791 parents_snaps[idx].add(s)
792 # Test them as possible intermediate snapshot base
792 # Test them as possible intermediate snapshot base
793 # We test them from highest to lowest level. High level one are more
793 # We test them from highest to lowest level. High level one are more
794 # likely to result in small delta
794 # likely to result in small delta
795 floor = None
795 floor = None
796 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
796 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
797 siblings = set()
797 siblings = set()
798 for s in snaps:
798 for s in snaps:
799 siblings.update(snapshots[s])
799 siblings.update(snapshots[s])
800 # Before considering making a new intermediate snapshot, we check
800 # Before considering making a new intermediate snapshot, we check
801 # if an existing snapshot, children of base we consider, would be
801 # if an existing snapshot, children of base we consider, would be
802 # suitable.
802 # suitable.
803 #
803 #
804 # It give a change to reuse a delta chain "unrelated" to the
804 # It give a change to reuse a delta chain "unrelated" to the
805 # current revision instead of starting our own. Without such
805 # current revision instead of starting our own. Without such
806 # re-use, topological branches would keep reopening new chains.
806 # re-use, topological branches would keep reopening new chains.
807 # Creating more and more snapshot as the repository grow.
807 # Creating more and more snapshot as the repository grow.
808
808
809 if floor is not None:
809 if floor is not None:
810 # We only do this for siblings created after the one in our
810 # We only do this for siblings created after the one in our
811 # parent's delta chain. Those created before has less chances
811 # parent's delta chain. Those created before has less chances
812 # to be valid base since our ancestors had to create a new
812 # to be valid base since our ancestors had to create a new
813 # snapshot.
813 # snapshot.
814 siblings = [r for r in siblings if floor < r]
814 siblings = [r for r in siblings if floor < r]
815 yield tuple(sorted(siblings))
815 yield tuple(sorted(siblings))
816 # then test the base from our parent's delta chain.
816 # then test the base from our parent's delta chain.
817 yield tuple(sorted(snaps))
817 yield tuple(sorted(snaps))
818 floor = min(snaps)
818 floor = min(snaps)
819 # No suitable base found in the parent chain, search if any full
819 # No suitable base found in the parent chain, search if any full
820 # snapshots emitted since parent's base would be a suitable base for an
820 # snapshots emitted since parent's base would be a suitable base for an
821 # intermediate snapshot.
821 # intermediate snapshot.
822 #
822 #
823 # It give a chance to reuse a delta chain unrelated to the current
823 # It give a chance to reuse a delta chain unrelated to the current
824 # revisions instead of starting our own. Without such re-use,
824 # revisions instead of starting our own. Without such re-use,
825 # topological branches would keep reopening new full chains. Creating
825 # topological branches would keep reopening new full chains. Creating
826 # more and more snapshot as the repository grow.
826 # more and more snapshot as the repository grow.
827 yield tuple(snapshots[nullrev])
827 yield tuple(snapshots[nullrev])
828
828
829 if not sparse:
829 if not sparse:
830 # other approach failed try against prev to hopefully save us a
830 # other approach failed try against prev to hopefully save us a
831 # fulltext.
831 # fulltext.
832 yield (prev,)
832 yield (prev,)
833
833
834 class deltacomputer(object):
834 class deltacomputer(object):
835 def __init__(self, revlog):
835 def __init__(self, revlog):
836 self.revlog = revlog
836 self.revlog = revlog
837
837
838 def buildtext(self, revinfo, fh):
838 def buildtext(self, revinfo, fh):
839 """Builds a fulltext version of a revision
839 """Builds a fulltext version of a revision
840
840
841 revinfo: _revisioninfo instance that contains all needed info
841 revinfo: _revisioninfo instance that contains all needed info
842 fh: file handle to either the .i or the .d revlog file,
842 fh: file handle to either the .i or the .d revlog file,
843 depending on whether it is inlined or not
843 depending on whether it is inlined or not
844 """
844 """
845 btext = revinfo.btext
845 btext = revinfo.btext
846 if btext[0] is not None:
846 if btext[0] is not None:
847 return btext[0]
847 return btext[0]
848
848
849 revlog = self.revlog
849 revlog = self.revlog
850 cachedelta = revinfo.cachedelta
850 cachedelta = revinfo.cachedelta
851 baserev = cachedelta[0]
851 baserev = cachedelta[0]
852 delta = cachedelta[1]
852 delta = cachedelta[1]
853
853
854 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
854 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
855 revinfo.p1, revinfo.p2,
855 revinfo.p1, revinfo.p2,
856 revinfo.flags, revinfo.node)
856 revinfo.flags, revinfo.node)
857 return fulltext
857 return fulltext
858
858
859 def _builddeltadiff(self, base, revinfo, fh):
859 def _builddeltadiff(self, base, revinfo, fh):
860 revlog = self.revlog
860 revlog = self.revlog
861 t = self.buildtext(revinfo, fh)
861 t = self.buildtext(revinfo, fh)
862 if revlog.iscensored(base):
862 if revlog.iscensored(base):
863 # deltas based on a censored revision must replace the
863 # deltas based on a censored revision must replace the
864 # full content in one patch, so delta works everywhere
864 # full content in one patch, so delta works everywhere
865 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
865 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
866 delta = header + t
866 delta = header + t
867 else:
867 else:
868 ptext = revlog.revision(base, _df=fh, raw=True)
868 ptext = revlog.revision(base, _df=fh, raw=True)
869 delta = mdiff.textdiff(ptext, t)
869 delta = mdiff.textdiff(ptext, t)
870
870
871 return delta
871 return delta
872
872
873 def _builddeltainfo(self, revinfo, base, fh):
873 def _builddeltainfo(self, revinfo, base, fh):
874 # can we use the cached delta?
874 # can we use the cached delta?
875 delta = None
875 delta = None
876 if revinfo.cachedelta:
876 if revinfo.cachedelta:
877 cachebase, cachediff = revinfo.cachedelta
877 cachebase, cachediff = revinfo.cachedelta
878 #check if the diff still apply
878 #check if the diff still apply
879 currentbase = cachebase
879 currentbase = cachebase
880 while (currentbase != nullrev
880 while (currentbase != nullrev
881 and currentbase != base
881 and currentbase != base
882 and self.revlog.length(currentbase) == 0):
882 and self.revlog.length(currentbase) == 0):
883 currentbase = self.revlog.deltaparent(currentbase)
883 currentbase = self.revlog.deltaparent(currentbase)
884 if currentbase == base:
884 if currentbase == base:
885 delta = revinfo.cachedelta[1]
885 delta = revinfo.cachedelta[1]
886 if delta is None:
886 if delta is None:
887 delta = self._builddeltadiff(base, revinfo, fh)
887 delta = self._builddeltadiff(base, revinfo, fh)
888 revlog = self.revlog
888 revlog = self.revlog
889 header, data = revlog.compress(delta)
889 header, data = revlog.compress(delta)
890 deltalen = len(header) + len(data)
890 deltalen = len(header) + len(data)
891 chainbase = revlog.chainbase(base)
891 chainbase = revlog.chainbase(base)
892 offset = revlog.end(len(revlog) - 1)
892 offset = revlog.end(len(revlog) - 1)
893 dist = deltalen + offset - revlog.start(chainbase)
893 dist = deltalen + offset - revlog.start(chainbase)
894 if revlog._generaldelta:
894 if revlog._generaldelta:
895 deltabase = base
895 deltabase = base
896 else:
896 else:
897 deltabase = chainbase
897 deltabase = chainbase
898 chainlen, compresseddeltalen = revlog._chaininfo(base)
898 chainlen, compresseddeltalen = revlog._chaininfo(base)
899 chainlen += 1
899 chainlen += 1
900 compresseddeltalen += deltalen
900 compresseddeltalen += deltalen
901
901
902 revlog = self.revlog
902 revlog = self.revlog
903 snapshotdepth = None
903 snapshotdepth = None
904 if deltabase == nullrev:
904 if deltabase == nullrev:
905 snapshotdepth = 0
905 snapshotdepth = 0
906 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
906 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
907 # A delta chain should always be one full snapshot,
907 # A delta chain should always be one full snapshot,
908 # zero or more semi-snapshots, and zero or more deltas
908 # zero or more semi-snapshots, and zero or more deltas
909 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
909 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
910 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
910 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
911 snapshotdepth = len(revlog._deltachain(deltabase)[0])
911 snapshotdepth = len(revlog._deltachain(deltabase)[0])
912
912
913 return _deltainfo(dist, deltalen, (header, data), deltabase,
913 return _deltainfo(dist, deltalen, (header, data), deltabase,
914 chainbase, chainlen, compresseddeltalen,
914 chainbase, chainlen, compresseddeltalen,
915 snapshotdepth)
915 snapshotdepth)
916
916
917 def _fullsnapshotinfo(self, fh, revinfo):
917 def _fullsnapshotinfo(self, fh, revinfo):
918 curr = len(self.revlog)
918 curr = len(self.revlog)
919 rawtext = self.buildtext(revinfo, fh)
919 rawtext = self.buildtext(revinfo, fh)
920 data = self.revlog.compress(rawtext)
920 data = self.revlog.compress(rawtext)
921 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
921 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
922 deltabase = chainbase = curr
922 deltabase = chainbase = curr
923 snapshotdepth = 0
923 snapshotdepth = 0
924 chainlen = 1
924 chainlen = 1
925
925
926 return _deltainfo(dist, deltalen, data, deltabase,
926 return _deltainfo(dist, deltalen, data, deltabase,
927 chainbase, chainlen, compresseddeltalen,
927 chainbase, chainlen, compresseddeltalen,
928 snapshotdepth)
928 snapshotdepth)
929
929
930 def finddeltainfo(self, revinfo, fh):
930 def finddeltainfo(self, revinfo, fh):
931 """Find an acceptable delta against a candidate revision
931 """Find an acceptable delta against a candidate revision
932
932
933 revinfo: information about the revision (instance of _revisioninfo)
933 revinfo: information about the revision (instance of _revisioninfo)
934 fh: file handle to either the .i or the .d revlog file,
934 fh: file handle to either the .i or the .d revlog file,
935 depending on whether it is inlined or not
935 depending on whether it is inlined or not
936
936
937 Returns the first acceptable candidate revision, as ordered by
937 Returns the first acceptable candidate revision, as ordered by
938 _candidategroups
938 _candidategroups
939
939
940 If no suitable deltabase is found, we return delta info for a full
940 If no suitable deltabase is found, we return delta info for a full
941 snapshot.
941 snapshot.
942 """
942 """
943 if not revinfo.textlen:
943 if not revinfo.textlen:
944 return self._fullsnapshotinfo(fh, revinfo)
944 return self._fullsnapshotinfo(fh, revinfo)
945
945
946 # no delta for flag processor revision (see "candelta" for why)
946 # no delta for flag processor revision (see "candelta" for why)
947 # not calling candelta since only one revision needs test, also to
947 # not calling candelta since only one revision needs test, also to
948 # avoid overhead fetching flags again.
948 # avoid overhead fetching flags again.
949 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
949 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
950 return self._fullsnapshotinfo(fh, revinfo)
950 return self._fullsnapshotinfo(fh, revinfo)
951
951
952 cachedelta = revinfo.cachedelta
952 cachedelta = revinfo.cachedelta
953 p1 = revinfo.p1
953 p1 = revinfo.p1
954 p2 = revinfo.p2
954 p2 = revinfo.p2
955 revlog = self.revlog
955 revlog = self.revlog
956
956
957 deltainfo = None
957 deltainfo = None
958 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
958 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
959 groups = _candidategroups(self.revlog, revinfo.textlen,
959 groups = _candidategroups(self.revlog, revinfo.textlen,
960 p1r, p2r, cachedelta)
960 p1r, p2r, cachedelta)
961 candidaterevs = next(groups)
961 candidaterevs = next(groups)
962 while candidaterevs is not None:
962 while candidaterevs is not None:
963 nominateddeltas = []
963 nominateddeltas = []
964 if deltainfo is not None:
964 if deltainfo is not None:
965 # if we already found a good delta,
965 # if we already found a good delta,
966 # challenge it against refined candidates
966 # challenge it against refined candidates
967 nominateddeltas.append(deltainfo)
967 nominateddeltas.append(deltainfo)
968 for candidaterev in candidaterevs:
968 for candidaterev in candidaterevs:
969 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
969 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
970 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
970 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
971 nominateddeltas.append(candidatedelta)
971 nominateddeltas.append(candidatedelta)
972 if nominateddeltas:
972 if nominateddeltas:
973 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
973 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
974 if deltainfo is not None:
974 if deltainfo is not None:
975 candidaterevs = groups.send(deltainfo.base)
975 candidaterevs = groups.send(deltainfo.base)
976 else:
976 else:
977 candidaterevs = next(groups)
977 candidaterevs = next(groups)
978
978
979 if deltainfo is None:
979 if deltainfo is None:
980 deltainfo = self._fullsnapshotinfo(fh, revinfo)
980 deltainfo = self._fullsnapshotinfo(fh, revinfo)
981 return deltainfo
981 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now