##// END OF EJS Templates
sparse-revlog: only refine delta candidates in the sparse case (issue6006)...
Boris Feld -
r40479:bafa1c4b stable
parent child Browse files
Show More
@@ -1,939 +1,944 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import heapq
13 import heapq
14 import struct
14 import struct
15
15
16 # import stuff from node for others to import from revlog
16 # import stuff from node for others to import from revlog
17 from ..node import (
17 from ..node import (
18 nullrev,
18 nullrev,
19 )
19 )
20 from ..i18n import _
20 from ..i18n import _
21
21
22 from .constants import (
22 from .constants import (
23 REVIDX_ISCENSORED,
23 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
25 )
26
26
27 from ..thirdparty import (
27 from ..thirdparty import (
28 attr,
28 attr,
29 )
29 )
30
30
31 from .. import (
31 from .. import (
32 error,
32 error,
33 mdiff,
33 mdiff,
34 )
34 )
35
35
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0):
42 def __init__(self, data, density=0.5, mingap=0):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47
47
48 def start(self, rev):
48 def start(self, rev):
49 if rev == 0:
49 if rev == 0:
50 return 0
50 return 0
51 return self._data[rev - 1]
51 return self._data[rev - 1]
52
52
53 def end(self, rev):
53 def end(self, rev):
54 return self._data[rev]
54 return self._data[rev]
55
55
56 def length(self, rev):
56 def length(self, rev):
57 return self.end(rev) - self.start(rev)
57 return self.end(rev) - self.start(rev)
58
58
59 def __len__(self):
59 def __len__(self):
60 return len(self._data)
60 return len(self._data)
61
61
62 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
62 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
63 """slice revs to reduce the amount of unrelated data to be read from disk.
63 """slice revs to reduce the amount of unrelated data to be read from disk.
64
64
65 ``revs`` is sliced into groups that should be read in one time.
65 ``revs`` is sliced into groups that should be read in one time.
66 Assume that revs are sorted.
66 Assume that revs are sorted.
67
67
68 The initial chunk is sliced until the overall density (payload/chunks-span
68 The initial chunk is sliced until the overall density (payload/chunks-span
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
70 `revlog._srmingapsize` is skipped.
70 `revlog._srmingapsize` is skipped.
71
71
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
73 For consistency with other slicing choice, this limit won't go lower than
73 For consistency with other slicing choice, this limit won't go lower than
74 `revlog._srmingapsize`.
74 `revlog._srmingapsize`.
75
75
76 If individual revisions chunk are larger than this limit, they will still
76 If individual revisions chunk are larger than this limit, they will still
77 be raised individually.
77 be raised individually.
78
78
79 >>> revlog = _testrevlog([
79 >>> revlog = _testrevlog([
80 ... 5, #00 (5)
80 ... 5, #00 (5)
81 ... 10, #01 (5)
81 ... 10, #01 (5)
82 ... 12, #02 (2)
82 ... 12, #02 (2)
83 ... 12, #03 (empty)
83 ... 12, #03 (empty)
84 ... 27, #04 (15)
84 ... 27, #04 (15)
85 ... 31, #05 (4)
85 ... 31, #05 (4)
86 ... 31, #06 (empty)
86 ... 31, #06 (empty)
87 ... 42, #07 (11)
87 ... 42, #07 (11)
88 ... 47, #08 (5)
88 ... 47, #08 (5)
89 ... 47, #09 (empty)
89 ... 47, #09 (empty)
90 ... 48, #10 (1)
90 ... 48, #10 (1)
91 ... 51, #11 (3)
91 ... 51, #11 (3)
92 ... 74, #12 (23)
92 ... 74, #12 (23)
93 ... 85, #13 (11)
93 ... 85, #13 (11)
94 ... 86, #14 (1)
94 ... 86, #14 (1)
95 ... 91, #15 (5)
95 ... 91, #15 (5)
96 ... ])
96 ... ])
97
97
98 >>> list(slicechunk(revlog, list(range(16))))
98 >>> list(slicechunk(revlog, list(range(16))))
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
100 >>> list(slicechunk(revlog, [0, 15]))
100 >>> list(slicechunk(revlog, [0, 15]))
101 [[0], [15]]
101 [[0], [15]]
102 >>> list(slicechunk(revlog, [0, 11, 15]))
102 >>> list(slicechunk(revlog, [0, 11, 15]))
103 [[0], [11], [15]]
103 [[0], [11], [15]]
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
105 [[0], [11, 13, 15]]
105 [[0], [11, 13, 15]]
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
107 [[1, 2], [5, 8, 10, 11], [14]]
107 [[1, 2], [5, 8, 10, 11], [14]]
108
108
109 Slicing with a maximum chunk size
109 Slicing with a maximum chunk size
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
111 [[0], [11], [13], [15]]
111 [[0], [11], [13], [15]]
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
113 [[0], [11], [13, 15]]
113 [[0], [11], [13, 15]]
114 """
114 """
115 if targetsize is not None:
115 if targetsize is not None:
116 targetsize = max(targetsize, revlog._srmingapsize)
116 targetsize = max(targetsize, revlog._srmingapsize)
117 # targetsize should not be specified when evaluating delta candidates:
117 # targetsize should not be specified when evaluating delta candidates:
118 # * targetsize is used to ensure we stay within specification when reading,
118 # * targetsize is used to ensure we stay within specification when reading,
119 # * deltainfo is used to pick are good delta chain when writing.
119 # * deltainfo is used to pick are good delta chain when writing.
120 if not (deltainfo is None or targetsize is None):
120 if not (deltainfo is None or targetsize is None):
121 msg = 'cannot use `targetsize` with a `deltainfo`'
121 msg = 'cannot use `targetsize` with a `deltainfo`'
122 raise error.ProgrammingError(msg)
122 raise error.ProgrammingError(msg)
123 for chunk in _slicechunktodensity(revlog, revs,
123 for chunk in _slicechunktodensity(revlog, revs,
124 deltainfo,
124 deltainfo,
125 revlog._srdensitythreshold,
125 revlog._srdensitythreshold,
126 revlog._srmingapsize):
126 revlog._srmingapsize):
127 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
127 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
128 yield subchunk
128 yield subchunk
129
129
130 def _slicechunktosize(revlog, revs, targetsize=None):
130 def _slicechunktosize(revlog, revs, targetsize=None):
131 """slice revs to match the target size
131 """slice revs to match the target size
132
132
133 This is intended to be used on chunk that density slicing selected by that
133 This is intended to be used on chunk that density slicing selected by that
134 are still too large compared to the read garantee of revlog. This might
134 are still too large compared to the read garantee of revlog. This might
135 happens when "minimal gap size" interrupted the slicing or when chain are
135 happens when "minimal gap size" interrupted the slicing or when chain are
136 built in a way that create large blocks next to each other.
136 built in a way that create large blocks next to each other.
137
137
138 >>> revlog = _testrevlog([
138 >>> revlog = _testrevlog([
139 ... 3, #0 (3)
139 ... 3, #0 (3)
140 ... 5, #1 (2)
140 ... 5, #1 (2)
141 ... 6, #2 (1)
141 ... 6, #2 (1)
142 ... 8, #3 (2)
142 ... 8, #3 (2)
143 ... 8, #4 (empty)
143 ... 8, #4 (empty)
144 ... 11, #5 (3)
144 ... 11, #5 (3)
145 ... 12, #6 (1)
145 ... 12, #6 (1)
146 ... 13, #7 (1)
146 ... 13, #7 (1)
147 ... 14, #8 (1)
147 ... 14, #8 (1)
148 ... ])
148 ... ])
149
149
150 Cases where chunk is already small enough
150 Cases where chunk is already small enough
151 >>> list(_slicechunktosize(revlog, [0], 3))
151 >>> list(_slicechunktosize(revlog, [0], 3))
152 [[0]]
152 [[0]]
153 >>> list(_slicechunktosize(revlog, [6, 7], 3))
153 >>> list(_slicechunktosize(revlog, [6, 7], 3))
154 [[6, 7]]
154 [[6, 7]]
155 >>> list(_slicechunktosize(revlog, [0], None))
155 >>> list(_slicechunktosize(revlog, [0], None))
156 [[0]]
156 [[0]]
157 >>> list(_slicechunktosize(revlog, [6, 7], None))
157 >>> list(_slicechunktosize(revlog, [6, 7], None))
158 [[6, 7]]
158 [[6, 7]]
159
159
160 cases where we need actual slicing
160 cases where we need actual slicing
161 >>> list(_slicechunktosize(revlog, [0, 1], 3))
161 >>> list(_slicechunktosize(revlog, [0, 1], 3))
162 [[0], [1]]
162 [[0], [1]]
163 >>> list(_slicechunktosize(revlog, [1, 3], 3))
163 >>> list(_slicechunktosize(revlog, [1, 3], 3))
164 [[1], [3]]
164 [[1], [3]]
165 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
165 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
166 [[1, 2], [3]]
166 [[1, 2], [3]]
167 >>> list(_slicechunktosize(revlog, [3, 5], 3))
167 >>> list(_slicechunktosize(revlog, [3, 5], 3))
168 [[3], [5]]
168 [[3], [5]]
169 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
169 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
170 [[3], [5]]
170 [[3], [5]]
171 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
171 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
172 [[5], [6, 7, 8]]
172 [[5], [6, 7, 8]]
173 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
173 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
174 [[0], [1, 2], [3], [5], [6, 7, 8]]
174 [[0], [1, 2], [3], [5], [6, 7, 8]]
175
175
176 Case with too large individual chunk (must return valid chunk)
176 Case with too large individual chunk (must return valid chunk)
177 >>> list(_slicechunktosize(revlog, [0, 1], 2))
177 >>> list(_slicechunktosize(revlog, [0, 1], 2))
178 [[0], [1]]
178 [[0], [1]]
179 >>> list(_slicechunktosize(revlog, [1, 3], 1))
179 >>> list(_slicechunktosize(revlog, [1, 3], 1))
180 [[1], [3]]
180 [[1], [3]]
181 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
181 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
182 [[3], [5]]
182 [[3], [5]]
183 """
183 """
184 assert targetsize is None or 0 <= targetsize
184 assert targetsize is None or 0 <= targetsize
185 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
185 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
186 yield revs
186 yield revs
187 return
187 return
188
188
189 startrevidx = 0
189 startrevidx = 0
190 startdata = revlog.start(revs[0])
190 startdata = revlog.start(revs[0])
191 endrevidx = 0
191 endrevidx = 0
192 iterrevs = enumerate(revs)
192 iterrevs = enumerate(revs)
193 next(iterrevs) # skip first rev.
193 next(iterrevs) # skip first rev.
194 for idx, r in iterrevs:
194 for idx, r in iterrevs:
195 span = revlog.end(r) - startdata
195 span = revlog.end(r) - startdata
196 if span <= targetsize:
196 if span <= targetsize:
197 endrevidx = idx
197 endrevidx = idx
198 else:
198 else:
199 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
199 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
200 if chunk:
200 if chunk:
201 yield chunk
201 yield chunk
202 startrevidx = idx
202 startrevidx = idx
203 startdata = revlog.start(r)
203 startdata = revlog.start(r)
204 endrevidx = idx
204 endrevidx = idx
205 yield _trimchunk(revlog, revs, startrevidx)
205 yield _trimchunk(revlog, revs, startrevidx)
206
206
207 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
207 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
208 mingapsize=0):
208 mingapsize=0):
209 """slice revs to reduce the amount of unrelated data to be read from disk.
209 """slice revs to reduce the amount of unrelated data to be read from disk.
210
210
211 ``revs`` is sliced into groups that should be read in one time.
211 ``revs`` is sliced into groups that should be read in one time.
212 Assume that revs are sorted.
212 Assume that revs are sorted.
213
213
214 ``deltainfo`` is a _deltainfo instance of a revision that we would append
214 ``deltainfo`` is a _deltainfo instance of a revision that we would append
215 to the top of the revlog.
215 to the top of the revlog.
216
216
217 The initial chunk is sliced until the overall density (payload/chunks-span
217 The initial chunk is sliced until the overall density (payload/chunks-span
218 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
218 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
219 skipped.
219 skipped.
220
220
221 >>> revlog = _testrevlog([
221 >>> revlog = _testrevlog([
222 ... 5, #00 (5)
222 ... 5, #00 (5)
223 ... 10, #01 (5)
223 ... 10, #01 (5)
224 ... 12, #02 (2)
224 ... 12, #02 (2)
225 ... 12, #03 (empty)
225 ... 12, #03 (empty)
226 ... 27, #04 (15)
226 ... 27, #04 (15)
227 ... 31, #05 (4)
227 ... 31, #05 (4)
228 ... 31, #06 (empty)
228 ... 31, #06 (empty)
229 ... 42, #07 (11)
229 ... 42, #07 (11)
230 ... 47, #08 (5)
230 ... 47, #08 (5)
231 ... 47, #09 (empty)
231 ... 47, #09 (empty)
232 ... 48, #10 (1)
232 ... 48, #10 (1)
233 ... 51, #11 (3)
233 ... 51, #11 (3)
234 ... 74, #12 (23)
234 ... 74, #12 (23)
235 ... 85, #13 (11)
235 ... 85, #13 (11)
236 ... 86, #14 (1)
236 ... 86, #14 (1)
237 ... 91, #15 (5)
237 ... 91, #15 (5)
238 ... ])
238 ... ])
239
239
240 >>> list(_slicechunktodensity(revlog, list(range(16))))
240 >>> list(_slicechunktodensity(revlog, list(range(16))))
241 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
241 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
242 >>> list(_slicechunktodensity(revlog, [0, 15]))
242 >>> list(_slicechunktodensity(revlog, [0, 15]))
243 [[0], [15]]
243 [[0], [15]]
244 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
244 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
245 [[0], [11], [15]]
245 [[0], [11], [15]]
246 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
246 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
247 [[0], [11, 13, 15]]
247 [[0], [11, 13, 15]]
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
249 [[1, 2], [5, 8, 10, 11], [14]]
249 [[1, 2], [5, 8, 10, 11], [14]]
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
251 ... mingapsize=20))
251 ... mingapsize=20))
252 [[1, 2, 3, 5, 8, 10, 11], [14]]
252 [[1, 2, 3, 5, 8, 10, 11], [14]]
253 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
253 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
254 ... targetdensity=0.95))
254 ... targetdensity=0.95))
255 [[1, 2], [5], [8, 10, 11], [14]]
255 [[1, 2], [5], [8, 10, 11], [14]]
256 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
256 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
257 ... targetdensity=0.95, mingapsize=12))
257 ... targetdensity=0.95, mingapsize=12))
258 [[1, 2], [5, 8, 10, 11], [14]]
258 [[1, 2], [5, 8, 10, 11], [14]]
259 """
259 """
260 start = revlog.start
260 start = revlog.start
261 length = revlog.length
261 length = revlog.length
262
262
263 if len(revs) <= 1:
263 if len(revs) <= 1:
264 yield revs
264 yield revs
265 return
265 return
266
266
267 nextrev = len(revlog)
267 nextrev = len(revlog)
268 nextoffset = revlog.end(nextrev - 1)
268 nextoffset = revlog.end(nextrev - 1)
269
269
270 if deltainfo is None:
270 if deltainfo is None:
271 deltachainspan = segmentspan(revlog, revs)
271 deltachainspan = segmentspan(revlog, revs)
272 chainpayload = sum(length(r) for r in revs)
272 chainpayload = sum(length(r) for r in revs)
273 else:
273 else:
274 deltachainspan = deltainfo.distance
274 deltachainspan = deltainfo.distance
275 chainpayload = deltainfo.compresseddeltalen
275 chainpayload = deltainfo.compresseddeltalen
276
276
277 if deltachainspan < mingapsize:
277 if deltachainspan < mingapsize:
278 yield revs
278 yield revs
279 return
279 return
280
280
281 readdata = deltachainspan
281 readdata = deltachainspan
282
282
283 if deltachainspan:
283 if deltachainspan:
284 density = chainpayload / float(deltachainspan)
284 density = chainpayload / float(deltachainspan)
285 else:
285 else:
286 density = 1.0
286 density = 1.0
287
287
288 if density >= targetdensity:
288 if density >= targetdensity:
289 yield revs
289 yield revs
290 return
290 return
291
291
292 if deltainfo is not None and deltainfo.deltalen:
292 if deltainfo is not None and deltainfo.deltalen:
293 revs = list(revs)
293 revs = list(revs)
294 revs.append(nextrev)
294 revs.append(nextrev)
295
295
296 # Store the gaps in a heap to have them sorted by decreasing size
296 # Store the gaps in a heap to have them sorted by decreasing size
297 gapsheap = []
297 gapsheap = []
298 heapq.heapify(gapsheap)
298 heapq.heapify(gapsheap)
299 prevend = None
299 prevend = None
300 for i, rev in enumerate(revs):
300 for i, rev in enumerate(revs):
301 if rev < nextrev:
301 if rev < nextrev:
302 revstart = start(rev)
302 revstart = start(rev)
303 revlen = length(rev)
303 revlen = length(rev)
304 else:
304 else:
305 revstart = nextoffset
305 revstart = nextoffset
306 revlen = deltainfo.deltalen
306 revlen = deltainfo.deltalen
307
307
308 # Skip empty revisions to form larger holes
308 # Skip empty revisions to form larger holes
309 if revlen == 0:
309 if revlen == 0:
310 continue
310 continue
311
311
312 if prevend is not None:
312 if prevend is not None:
313 gapsize = revstart - prevend
313 gapsize = revstart - prevend
314 # only consider holes that are large enough
314 # only consider holes that are large enough
315 if gapsize > mingapsize:
315 if gapsize > mingapsize:
316 heapq.heappush(gapsheap, (-gapsize, i))
316 heapq.heappush(gapsheap, (-gapsize, i))
317
317
318 prevend = revstart + revlen
318 prevend = revstart + revlen
319
319
320 # Collect the indices of the largest holes until the density is acceptable
320 # Collect the indices of the largest holes until the density is acceptable
321 indicesheap = []
321 indicesheap = []
322 heapq.heapify(indicesheap)
322 heapq.heapify(indicesheap)
323 while gapsheap and density < targetdensity:
323 while gapsheap and density < targetdensity:
324 oppgapsize, gapidx = heapq.heappop(gapsheap)
324 oppgapsize, gapidx = heapq.heappop(gapsheap)
325
325
326 heapq.heappush(indicesheap, gapidx)
326 heapq.heappush(indicesheap, gapidx)
327
327
328 # the gap sizes are stored as negatives to be sorted decreasingly
328 # the gap sizes are stored as negatives to be sorted decreasingly
329 # by the heap
329 # by the heap
330 readdata -= (-oppgapsize)
330 readdata -= (-oppgapsize)
331 if readdata > 0:
331 if readdata > 0:
332 density = chainpayload / float(readdata)
332 density = chainpayload / float(readdata)
333 else:
333 else:
334 density = 1.0
334 density = 1.0
335
335
336 # Cut the revs at collected indices
336 # Cut the revs at collected indices
337 previdx = 0
337 previdx = 0
338 while indicesheap:
338 while indicesheap:
339 idx = heapq.heappop(indicesheap)
339 idx = heapq.heappop(indicesheap)
340
340
341 chunk = _trimchunk(revlog, revs, previdx, idx)
341 chunk = _trimchunk(revlog, revs, previdx, idx)
342 if chunk:
342 if chunk:
343 yield chunk
343 yield chunk
344
344
345 previdx = idx
345 previdx = idx
346
346
347 chunk = _trimchunk(revlog, revs, previdx)
347 chunk = _trimchunk(revlog, revs, previdx)
348 if chunk:
348 if chunk:
349 yield chunk
349 yield chunk
350
350
351 def _trimchunk(revlog, revs, startidx, endidx=None):
351 def _trimchunk(revlog, revs, startidx, endidx=None):
352 """returns revs[startidx:endidx] without empty trailing revs
352 """returns revs[startidx:endidx] without empty trailing revs
353
353
354 Doctest Setup
354 Doctest Setup
355 >>> revlog = _testrevlog([
355 >>> revlog = _testrevlog([
356 ... 5, #0
356 ... 5, #0
357 ... 10, #1
357 ... 10, #1
358 ... 12, #2
358 ... 12, #2
359 ... 12, #3 (empty)
359 ... 12, #3 (empty)
360 ... 17, #4
360 ... 17, #4
361 ... 21, #5
361 ... 21, #5
362 ... 21, #6 (empty)
362 ... 21, #6 (empty)
363 ... ])
363 ... ])
364
364
365 Contiguous cases:
365 Contiguous cases:
366 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
366 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
367 [0, 1, 2, 3, 4, 5]
367 [0, 1, 2, 3, 4, 5]
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
369 [0, 1, 2, 3, 4]
369 [0, 1, 2, 3, 4]
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
371 [0, 1, 2]
371 [0, 1, 2]
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
373 [2]
373 [2]
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
375 [3, 4, 5]
375 [3, 4, 5]
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
377 [3, 4]
377 [3, 4]
378
378
379 Discontiguous cases:
379 Discontiguous cases:
380 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
380 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
381 [1, 3, 5]
381 [1, 3, 5]
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
383 [1]
383 [1]
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
385 [3, 5]
385 [3, 5]
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
387 [3, 5]
387 [3, 5]
388 """
388 """
389 length = revlog.length
389 length = revlog.length
390
390
391 if endidx is None:
391 if endidx is None:
392 endidx = len(revs)
392 endidx = len(revs)
393
393
394 # If we have a non-emtpy delta candidate, there are nothing to trim
394 # If we have a non-emtpy delta candidate, there are nothing to trim
395 if revs[endidx - 1] < len(revlog):
395 if revs[endidx - 1] < len(revlog):
396 # Trim empty revs at the end, except the very first revision of a chain
396 # Trim empty revs at the end, except the very first revision of a chain
397 while (endidx > 1
397 while (endidx > 1
398 and endidx > startidx
398 and endidx > startidx
399 and length(revs[endidx - 1]) == 0):
399 and length(revs[endidx - 1]) == 0):
400 endidx -= 1
400 endidx -= 1
401
401
402 return revs[startidx:endidx]
402 return revs[startidx:endidx]
403
403
404 def segmentspan(revlog, revs, deltainfo=None):
404 def segmentspan(revlog, revs, deltainfo=None):
405 """Get the byte span of a segment of revisions
405 """Get the byte span of a segment of revisions
406
406
407 revs is a sorted array of revision numbers
407 revs is a sorted array of revision numbers
408
408
409 >>> revlog = _testrevlog([
409 >>> revlog = _testrevlog([
410 ... 5, #0
410 ... 5, #0
411 ... 10, #1
411 ... 10, #1
412 ... 12, #2
412 ... 12, #2
413 ... 12, #3 (empty)
413 ... 12, #3 (empty)
414 ... 17, #4
414 ... 17, #4
415 ... ])
415 ... ])
416
416
417 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
417 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
418 17
418 17
419 >>> segmentspan(revlog, [0, 4])
419 >>> segmentspan(revlog, [0, 4])
420 17
420 17
421 >>> segmentspan(revlog, [3, 4])
421 >>> segmentspan(revlog, [3, 4])
422 5
422 5
423 >>> segmentspan(revlog, [1, 2, 3,])
423 >>> segmentspan(revlog, [1, 2, 3,])
424 7
424 7
425 >>> segmentspan(revlog, [1, 3])
425 >>> segmentspan(revlog, [1, 3])
426 7
426 7
427 """
427 """
428 if not revs:
428 if not revs:
429 return 0
429 return 0
430 if deltainfo is not None and len(revlog) <= revs[-1]:
430 if deltainfo is not None and len(revlog) <= revs[-1]:
431 if len(revs) == 1:
431 if len(revs) == 1:
432 return deltainfo.deltalen
432 return deltainfo.deltalen
433 offset = revlog.end(len(revlog) - 1)
433 offset = revlog.end(len(revlog) - 1)
434 end = deltainfo.deltalen + offset
434 end = deltainfo.deltalen + offset
435 else:
435 else:
436 end = revlog.end(revs[-1])
436 end = revlog.end(revs[-1])
437 return end - revlog.start(revs[0])
437 return end - revlog.start(revs[0])
438
438
439 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
439 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
440 """build full text from a (base, delta) pair and other metadata"""
440 """build full text from a (base, delta) pair and other metadata"""
441 # special case deltas which replace entire base; no need to decode
441 # special case deltas which replace entire base; no need to decode
442 # base revision. this neatly avoids censored bases, which throw when
442 # base revision. this neatly avoids censored bases, which throw when
443 # they're decoded.
443 # they're decoded.
444 hlen = struct.calcsize(">lll")
444 hlen = struct.calcsize(">lll")
445 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
445 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
446 len(delta) - hlen):
446 len(delta) - hlen):
447 fulltext = delta[hlen:]
447 fulltext = delta[hlen:]
448 else:
448 else:
449 # deltabase is rawtext before changed by flag processors, which is
449 # deltabase is rawtext before changed by flag processors, which is
450 # equivalent to non-raw text
450 # equivalent to non-raw text
451 basetext = revlog.revision(baserev, _df=fh, raw=False)
451 basetext = revlog.revision(baserev, _df=fh, raw=False)
452 fulltext = mdiff.patch(basetext, delta)
452 fulltext = mdiff.patch(basetext, delta)
453
453
454 try:
454 try:
455 res = revlog._processflags(fulltext, flags, 'read', raw=True)
455 res = revlog._processflags(fulltext, flags, 'read', raw=True)
456 fulltext, validatehash = res
456 fulltext, validatehash = res
457 if validatehash:
457 if validatehash:
458 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
458 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
459 if flags & REVIDX_ISCENSORED:
459 if flags & REVIDX_ISCENSORED:
460 raise error.StorageError(_('node %s is not censored') %
460 raise error.StorageError(_('node %s is not censored') %
461 expectednode)
461 expectednode)
462 except error.CensoredNodeError:
462 except error.CensoredNodeError:
463 # must pass the censored index flag to add censored revisions
463 # must pass the censored index flag to add censored revisions
464 if not flags & REVIDX_ISCENSORED:
464 if not flags & REVIDX_ISCENSORED:
465 raise
465 raise
466 return fulltext
466 return fulltext
467
467
468 @attr.s(slots=True, frozen=True)
468 @attr.s(slots=True, frozen=True)
469 class _deltainfo(object):
469 class _deltainfo(object):
470 distance = attr.ib()
470 distance = attr.ib()
471 deltalen = attr.ib()
471 deltalen = attr.ib()
472 data = attr.ib()
472 data = attr.ib()
473 base = attr.ib()
473 base = attr.ib()
474 chainbase = attr.ib()
474 chainbase = attr.ib()
475 chainlen = attr.ib()
475 chainlen = attr.ib()
476 compresseddeltalen = attr.ib()
476 compresseddeltalen = attr.ib()
477 snapshotdepth = attr.ib()
477 snapshotdepth = attr.ib()
478
478
479 def isgooddeltainfo(revlog, deltainfo, revinfo):
479 def isgooddeltainfo(revlog, deltainfo, revinfo):
480 """Returns True if the given delta is good. Good means that it is within
480 """Returns True if the given delta is good. Good means that it is within
481 the disk span, disk size, and chain length bounds that we know to be
481 the disk span, disk size, and chain length bounds that we know to be
482 performant."""
482 performant."""
483 if deltainfo is None:
483 if deltainfo is None:
484 return False
484 return False
485
485
486 # - 'deltainfo.distance' is the distance from the base revision --
486 # - 'deltainfo.distance' is the distance from the base revision --
487 # bounding it limits the amount of I/O we need to do.
487 # bounding it limits the amount of I/O we need to do.
488 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
488 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
489 # deltas we need to apply -- bounding it limits the amount of CPU
489 # deltas we need to apply -- bounding it limits the amount of CPU
490 # we consume.
490 # we consume.
491
491
492 if revlog._sparserevlog:
492 if revlog._sparserevlog:
493 # As sparse-read will be used, we can consider that the distance,
493 # As sparse-read will be used, we can consider that the distance,
494 # instead of being the span of the whole chunk,
494 # instead of being the span of the whole chunk,
495 # is the span of the largest read chunk
495 # is the span of the largest read chunk
496 base = deltainfo.base
496 base = deltainfo.base
497
497
498 if base != nullrev:
498 if base != nullrev:
499 deltachain = revlog._deltachain(base)[0]
499 deltachain = revlog._deltachain(base)[0]
500 else:
500 else:
501 deltachain = []
501 deltachain = []
502
502
503 # search for the first non-snapshot revision
503 # search for the first non-snapshot revision
504 for idx, r in enumerate(deltachain):
504 for idx, r in enumerate(deltachain):
505 if not revlog.issnapshot(r):
505 if not revlog.issnapshot(r):
506 break
506 break
507 deltachain = deltachain[idx:]
507 deltachain = deltachain[idx:]
508 chunks = slicechunk(revlog, deltachain, deltainfo)
508 chunks = slicechunk(revlog, deltachain, deltainfo)
509 all_span = [segmentspan(revlog, revs, deltainfo)
509 all_span = [segmentspan(revlog, revs, deltainfo)
510 for revs in chunks]
510 for revs in chunks]
511 distance = max(all_span)
511 distance = max(all_span)
512 else:
512 else:
513 distance = deltainfo.distance
513 distance = deltainfo.distance
514
514
515 textlen = revinfo.textlen
515 textlen = revinfo.textlen
516 defaultmax = textlen * 4
516 defaultmax = textlen * 4
517 maxdist = revlog._maxdeltachainspan
517 maxdist = revlog._maxdeltachainspan
518 if not maxdist:
518 if not maxdist:
519 maxdist = distance # ensure the conditional pass
519 maxdist = distance # ensure the conditional pass
520 maxdist = max(maxdist, defaultmax)
520 maxdist = max(maxdist, defaultmax)
521 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
521 if revlog._sparserevlog and maxdist < revlog._srmingapsize:
522 # In multiple place, we are ignoring irrelevant data range below a
522 # In multiple place, we are ignoring irrelevant data range below a
523 # certain size. Be also apply this tradeoff here and relax span
523 # certain size. Be also apply this tradeoff here and relax span
524 # constraint for small enought content.
524 # constraint for small enought content.
525 maxdist = revlog._srmingapsize
525 maxdist = revlog._srmingapsize
526
526
527 # Bad delta from read span:
527 # Bad delta from read span:
528 #
528 #
529 # If the span of data read is larger than the maximum allowed.
529 # If the span of data read is larger than the maximum allowed.
530 if maxdist < distance:
530 if maxdist < distance:
531 return False
531 return False
532
532
533 # Bad delta from new delta size:
533 # Bad delta from new delta size:
534 #
534 #
535 # If the delta size is larger than the target text, storing the
535 # If the delta size is larger than the target text, storing the
536 # delta will be inefficient.
536 # delta will be inefficient.
537 if textlen < deltainfo.deltalen:
537 if textlen < deltainfo.deltalen:
538 return False
538 return False
539
539
540 # Bad delta from cumulated payload size:
540 # Bad delta from cumulated payload size:
541 #
541 #
542 # If the sum of delta get larger than K * target text length.
542 # If the sum of delta get larger than K * target text length.
543 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
543 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
544 return False
544 return False
545
545
546 # Bad delta from chain length:
546 # Bad delta from chain length:
547 #
547 #
548 # If the number of delta in the chain gets too high.
548 # If the number of delta in the chain gets too high.
549 if (revlog._maxchainlen
549 if (revlog._maxchainlen
550 and revlog._maxchainlen < deltainfo.chainlen):
550 and revlog._maxchainlen < deltainfo.chainlen):
551 return False
551 return False
552
552
553 # bad delta from intermediate snapshot size limit
553 # bad delta from intermediate snapshot size limit
554 #
554 #
555 # If an intermediate snapshot size is higher than the limit. The
555 # If an intermediate snapshot size is higher than the limit. The
556 # limit exist to prevent endless chain of intermediate delta to be
556 # limit exist to prevent endless chain of intermediate delta to be
557 # created.
557 # created.
558 if (deltainfo.snapshotdepth is not None and
558 if (deltainfo.snapshotdepth is not None and
559 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
559 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
560 return False
560 return False
561
561
562 # bad delta if new intermediate snapshot is larger than the previous
562 # bad delta if new intermediate snapshot is larger than the previous
563 # snapshot
563 # snapshot
564 if (deltainfo.snapshotdepth
564 if (deltainfo.snapshotdepth
565 and revlog.length(deltainfo.base) < deltainfo.deltalen):
565 and revlog.length(deltainfo.base) < deltainfo.deltalen):
566 return False
566 return False
567
567
568 return True
568 return True
569
569
570 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
570 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
571 """Provides group of revision to be tested as delta base
571 """Provides group of revision to be tested as delta base
572
572
573 This top level function focus on emitting groups with unique and worthwhile
573 This top level function focus on emitting groups with unique and worthwhile
574 content. See _raw_candidate_groups for details about the group order.
574 content. See _raw_candidate_groups for details about the group order.
575 """
575 """
576 # should we try to build a delta?
576 # should we try to build a delta?
577 if not (len(revlog) and revlog._storedeltachains):
577 if not (len(revlog) and revlog._storedeltachains):
578 yield None
578 yield None
579 return
579 return
580
580
581 deltalength = revlog.length
581 deltalength = revlog.length
582 deltaparent = revlog.deltaparent
582 deltaparent = revlog.deltaparent
583 good = None
583 good = None
584
584
585 deltas_limit = textlen * LIMIT_DELTA2TEXT
585 deltas_limit = textlen * LIMIT_DELTA2TEXT
586
586
587 tested = set([nullrev])
587 tested = set([nullrev])
588 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
588 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
589 while True:
589 while True:
590 temptative = candidates.send(good)
590 temptative = candidates.send(good)
591 if temptative is None:
591 if temptative is None:
592 break
592 break
593 group = []
593 group = []
594 for rev in temptative:
594 for rev in temptative:
595 # skip over empty delta (no need to include them in a chain)
595 # skip over empty delta (no need to include them in a chain)
596 while not (rev == nullrev or rev in tested or deltalength(rev)):
596 while not (rev == nullrev or rev in tested or deltalength(rev)):
597 tested.add(rev)
597 tested.add(rev)
598 rev = deltaparent(rev)
598 rev = deltaparent(rev)
599 # filter out revision we tested already
599 # filter out revision we tested already
600 if rev in tested:
600 if rev in tested:
601 continue
601 continue
602 tested.add(rev)
602 tested.add(rev)
603 # filter out delta base that will never produce good delta
603 # filter out delta base that will never produce good delta
604 if deltas_limit < revlog.length(rev):
604 if deltas_limit < revlog.length(rev):
605 continue
605 continue
606 # no need to try a delta against nullrev, this will be done as a
606 # no need to try a delta against nullrev, this will be done as a
607 # last resort.
607 # last resort.
608 if rev == nullrev:
608 if rev == nullrev:
609 continue
609 continue
610 # no delta for rawtext-changing revs (see "candelta" for why)
610 # no delta for rawtext-changing revs (see "candelta" for why)
611 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
611 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
612 continue
612 continue
613 group.append(rev)
613 group.append(rev)
614 if group:
614 if group:
615 # XXX: in the sparse revlog case, group can become large,
615 # XXX: in the sparse revlog case, group can become large,
616 # impacting performances. Some bounding or slicing mecanism
616 # impacting performances. Some bounding or slicing mecanism
617 # would help to reduce this impact.
617 # would help to reduce this impact.
618 good = yield tuple(group)
618 good = yield tuple(group)
619 yield None
619 yield None
620
620
621 def _findsnapshots(revlog, cache, start_rev):
621 def _findsnapshots(revlog, cache, start_rev):
622 """find snapshot from start_rev to tip"""
622 """find snapshot from start_rev to tip"""
623 deltaparent = revlog.deltaparent
623 deltaparent = revlog.deltaparent
624 issnapshot = revlog.issnapshot
624 issnapshot = revlog.issnapshot
625 for rev in revlog.revs(start_rev):
625 for rev in revlog.revs(start_rev):
626 if issnapshot(rev):
626 if issnapshot(rev):
627 cache[deltaparent(rev)].append(rev)
627 cache[deltaparent(rev)].append(rev)
628
628
629 def _refinedgroups(revlog, p1, p2, cachedelta):
629 def _refinedgroups(revlog, p1, p2, cachedelta):
630 good = None
630 good = None
631 # First we try to reuse a the delta contained in the bundle.
631 # First we try to reuse a the delta contained in the bundle.
632 # (or from the source revlog)
632 # (or from the source revlog)
633 #
633 #
634 # This logic only applies to general delta repositories and can be disabled
634 # This logic only applies to general delta repositories and can be disabled
635 # through configuration. Disabling reuse source delta is useful when
635 # through configuration. Disabling reuse source delta is useful when
636 # we want to make sure we recomputed "optimal" deltas.
636 # we want to make sure we recomputed "optimal" deltas.
637 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
637 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
638 # Assume what we received from the server is a good choice
638 # Assume what we received from the server is a good choice
639 # build delta will reuse the cache
639 # build delta will reuse the cache
640 good = yield (cachedelta[0],)
640 good = yield (cachedelta[0],)
641 if good is not None:
641 if good is not None:
642 yield None
642 yield None
643 return
643 return
644 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
644 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
645 good = yield candidates
645 good = yield candidates
646 if good is not None:
646 if good is not None:
647 break
647 break
648
648
649 # If sparse revlog is enabled, we can try to refine the available deltas
650 if not revlog._sparserevlog:
651 yield None
652 return
653
649 # if we have a refinable value, try to refine it
654 # if we have a refinable value, try to refine it
650 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
655 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
651 # refine snapshot down
656 # refine snapshot down
652 previous = None
657 previous = None
653 while previous != good:
658 while previous != good:
654 previous = good
659 previous = good
655 base = revlog.deltaparent(good)
660 base = revlog.deltaparent(good)
656 if base == nullrev:
661 if base == nullrev:
657 break
662 break
658 good = yield (base,)
663 good = yield (base,)
659 # refine snapshot up
664 # refine snapshot up
660 #
665 #
661 # XXX the _findsnapshots call can be expensive and is "duplicated" with
666 # XXX the _findsnapshots call can be expensive and is "duplicated" with
662 # the one done in `_rawgroups`. Once we start working on performance,
667 # the one done in `_rawgroups`. Once we start working on performance,
663 # we should make the two logics share this computation.
668 # we should make the two logics share this computation.
664 snapshots = collections.defaultdict(list)
669 snapshots = collections.defaultdict(list)
665 _findsnapshots(revlog, snapshots, good + 1)
670 _findsnapshots(revlog, snapshots, good + 1)
666 previous = None
671 previous = None
667 while good != previous:
672 while good != previous:
668 previous = good
673 previous = good
669 children = tuple(sorted(c for c in snapshots[good]))
674 children = tuple(sorted(c for c in snapshots[good]))
670 good = yield children
675 good = yield children
671
676
672 # we have found nothing
677 # we have found nothing
673 yield None
678 yield None
674
679
675 def _rawgroups(revlog, p1, p2, cachedelta):
680 def _rawgroups(revlog, p1, p2, cachedelta):
676 """Provides group of revision to be tested as delta base
681 """Provides group of revision to be tested as delta base
677
682
678 This lower level function focus on emitting delta theorically interresting
683 This lower level function focus on emitting delta theorically interresting
679 without looking it any practical details.
684 without looking it any practical details.
680
685
681 The group order aims at providing fast or small candidates first.
686 The group order aims at providing fast or small candidates first.
682 """
687 """
683 gdelta = revlog._generaldelta
688 gdelta = revlog._generaldelta
684 sparse = revlog._sparserevlog
689 sparse = revlog._sparserevlog
685 curr = len(revlog)
690 curr = len(revlog)
686 prev = curr - 1
691 prev = curr - 1
687 deltachain = lambda rev: revlog._deltachain(rev)[0]
692 deltachain = lambda rev: revlog._deltachain(rev)[0]
688
693
689 if gdelta:
694 if gdelta:
690 # exclude already lazy tested base if any
695 # exclude already lazy tested base if any
691 parents = [p for p in (p1, p2) if p != nullrev]
696 parents = [p for p in (p1, p2) if p != nullrev]
692
697
693 if not revlog._deltabothparents and len(parents) == 2:
698 if not revlog._deltabothparents and len(parents) == 2:
694 parents.sort()
699 parents.sort()
695 # To minimize the chance of having to build a fulltext,
700 # To minimize the chance of having to build a fulltext,
696 # pick first whichever parent is closest to us (max rev)
701 # pick first whichever parent is closest to us (max rev)
697 yield (parents[1],)
702 yield (parents[1],)
698 # then the other one (min rev) if the first did not fit
703 # then the other one (min rev) if the first did not fit
699 yield (parents[0],)
704 yield (parents[0],)
700 elif len(parents) > 0:
705 elif len(parents) > 0:
701 # Test all parents (1 or 2), and keep the best candidate
706 # Test all parents (1 or 2), and keep the best candidate
702 yield parents
707 yield parents
703
708
704 if sparse and parents:
709 if sparse and parents:
705 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
710 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
706 # See if we can use an existing snapshot in the parent chains to use as
711 # See if we can use an existing snapshot in the parent chains to use as
707 # a base for a new intermediate-snapshot
712 # a base for a new intermediate-snapshot
708 #
713 #
709 # search for snapshot in parents delta chain
714 # search for snapshot in parents delta chain
710 # map: snapshot-level: snapshot-rev
715 # map: snapshot-level: snapshot-rev
711 parents_snaps = collections.defaultdict(set)
716 parents_snaps = collections.defaultdict(set)
712 candidate_chains = [deltachain(p) for p in parents]
717 candidate_chains = [deltachain(p) for p in parents]
713 for chain in candidate_chains:
718 for chain in candidate_chains:
714 for idx, s in enumerate(chain):
719 for idx, s in enumerate(chain):
715 if not revlog.issnapshot(s):
720 if not revlog.issnapshot(s):
716 break
721 break
717 parents_snaps[idx].add(s)
722 parents_snaps[idx].add(s)
718 snapfloor = min(parents_snaps[0]) + 1
723 snapfloor = min(parents_snaps[0]) + 1
719 _findsnapshots(revlog, snapshots, snapfloor)
724 _findsnapshots(revlog, snapshots, snapfloor)
720 # search for the highest "unrelated" revision
725 # search for the highest "unrelated" revision
721 #
726 #
722 # Adding snapshots used by "unrelated" revision increase the odd we
727 # Adding snapshots used by "unrelated" revision increase the odd we
723 # reuse an independant, yet better snapshot chain.
728 # reuse an independant, yet better snapshot chain.
724 #
729 #
725 # XXX instead of building a set of revisions, we could lazily enumerate
730 # XXX instead of building a set of revisions, we could lazily enumerate
726 # over the chains. That would be more efficient, however we stick to
731 # over the chains. That would be more efficient, however we stick to
727 # simple code for now.
732 # simple code for now.
728 all_revs = set()
733 all_revs = set()
729 for chain in candidate_chains:
734 for chain in candidate_chains:
730 all_revs.update(chain)
735 all_revs.update(chain)
731 other = None
736 other = None
732 for r in revlog.revs(prev, snapfloor):
737 for r in revlog.revs(prev, snapfloor):
733 if r not in all_revs:
738 if r not in all_revs:
734 other = r
739 other = r
735 break
740 break
736 if other is not None:
741 if other is not None:
737 # To avoid unfair competition, we won't use unrelated intermediate
742 # To avoid unfair competition, we won't use unrelated intermediate
738 # snapshot that are deeper than the ones from the parent delta
743 # snapshot that are deeper than the ones from the parent delta
739 # chain.
744 # chain.
740 max_depth = max(parents_snaps.keys())
745 max_depth = max(parents_snaps.keys())
741 chain = deltachain(other)
746 chain = deltachain(other)
742 for idx, s in enumerate(chain):
747 for idx, s in enumerate(chain):
743 if s < snapfloor:
748 if s < snapfloor:
744 continue
749 continue
745 if max_depth < idx:
750 if max_depth < idx:
746 break
751 break
747 if not revlog.issnapshot(s):
752 if not revlog.issnapshot(s):
748 break
753 break
749 parents_snaps[idx].add(s)
754 parents_snaps[idx].add(s)
750 # Test them as possible intermediate snapshot base
755 # Test them as possible intermediate snapshot base
751 # We test them from highest to lowest level. High level one are more
756 # We test them from highest to lowest level. High level one are more
752 # likely to result in small delta
757 # likely to result in small delta
753 floor = None
758 floor = None
754 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
759 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
755 siblings = set()
760 siblings = set()
756 for s in snaps:
761 for s in snaps:
757 siblings.update(snapshots[s])
762 siblings.update(snapshots[s])
758 # Before considering making a new intermediate snapshot, we check
763 # Before considering making a new intermediate snapshot, we check
759 # if an existing snapshot, children of base we consider, would be
764 # if an existing snapshot, children of base we consider, would be
760 # suitable.
765 # suitable.
761 #
766 #
762 # It give a change to reuse a delta chain "unrelated" to the
767 # It give a change to reuse a delta chain "unrelated" to the
763 # current revision instead of starting our own. Without such
768 # current revision instead of starting our own. Without such
764 # re-use, topological branches would keep reopening new chains.
769 # re-use, topological branches would keep reopening new chains.
765 # Creating more and more snapshot as the repository grow.
770 # Creating more and more snapshot as the repository grow.
766
771
767 if floor is not None:
772 if floor is not None:
768 # We only do this for siblings created after the one in our
773 # We only do this for siblings created after the one in our
769 # parent's delta chain. Those created before has less chances
774 # parent's delta chain. Those created before has less chances
770 # to be valid base since our ancestors had to create a new
775 # to be valid base since our ancestors had to create a new
771 # snapshot.
776 # snapshot.
772 siblings = [r for r in siblings if floor < r]
777 siblings = [r for r in siblings if floor < r]
773 yield tuple(sorted(siblings))
778 yield tuple(sorted(siblings))
774 # then test the base from our parent's delta chain.
779 # then test the base from our parent's delta chain.
775 yield tuple(sorted(snaps))
780 yield tuple(sorted(snaps))
776 floor = min(snaps)
781 floor = min(snaps)
777 # No suitable base found in the parent chain, search if any full
782 # No suitable base found in the parent chain, search if any full
778 # snapshots emitted since parent's base would be a suitable base for an
783 # snapshots emitted since parent's base would be a suitable base for an
779 # intermediate snapshot.
784 # intermediate snapshot.
780 #
785 #
781 # It give a chance to reuse a delta chain unrelated to the current
786 # It give a chance to reuse a delta chain unrelated to the current
782 # revisions instead of starting our own. Without such re-use,
787 # revisions instead of starting our own. Without such re-use,
783 # topological branches would keep reopening new full chains. Creating
788 # topological branches would keep reopening new full chains. Creating
784 # more and more snapshot as the repository grow.
789 # more and more snapshot as the repository grow.
785 yield tuple(snapshots[nullrev])
790 yield tuple(snapshots[nullrev])
786
791
787 if not sparse:
792 if not sparse:
788 # other approach failed try against prev to hopefully save us a
793 # other approach failed try against prev to hopefully save us a
789 # fulltext.
794 # fulltext.
790 yield (prev,)
795 yield (prev,)
791
796
792 class deltacomputer(object):
797 class deltacomputer(object):
793 def __init__(self, revlog):
798 def __init__(self, revlog):
794 self.revlog = revlog
799 self.revlog = revlog
795
800
796 def buildtext(self, revinfo, fh):
801 def buildtext(self, revinfo, fh):
797 """Builds a fulltext version of a revision
802 """Builds a fulltext version of a revision
798
803
799 revinfo: _revisioninfo instance that contains all needed info
804 revinfo: _revisioninfo instance that contains all needed info
800 fh: file handle to either the .i or the .d revlog file,
805 fh: file handle to either the .i or the .d revlog file,
801 depending on whether it is inlined or not
806 depending on whether it is inlined or not
802 """
807 """
803 btext = revinfo.btext
808 btext = revinfo.btext
804 if btext[0] is not None:
809 if btext[0] is not None:
805 return btext[0]
810 return btext[0]
806
811
807 revlog = self.revlog
812 revlog = self.revlog
808 cachedelta = revinfo.cachedelta
813 cachedelta = revinfo.cachedelta
809 baserev = cachedelta[0]
814 baserev = cachedelta[0]
810 delta = cachedelta[1]
815 delta = cachedelta[1]
811
816
812 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
817 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
813 revinfo.p1, revinfo.p2,
818 revinfo.p1, revinfo.p2,
814 revinfo.flags, revinfo.node)
819 revinfo.flags, revinfo.node)
815 return fulltext
820 return fulltext
816
821
817 def _builddeltadiff(self, base, revinfo, fh):
822 def _builddeltadiff(self, base, revinfo, fh):
818 revlog = self.revlog
823 revlog = self.revlog
819 t = self.buildtext(revinfo, fh)
824 t = self.buildtext(revinfo, fh)
820 if revlog.iscensored(base):
825 if revlog.iscensored(base):
821 # deltas based on a censored revision must replace the
826 # deltas based on a censored revision must replace the
822 # full content in one patch, so delta works everywhere
827 # full content in one patch, so delta works everywhere
823 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
828 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
824 delta = header + t
829 delta = header + t
825 else:
830 else:
826 ptext = revlog.revision(base, _df=fh, raw=True)
831 ptext = revlog.revision(base, _df=fh, raw=True)
827 delta = mdiff.textdiff(ptext, t)
832 delta = mdiff.textdiff(ptext, t)
828
833
829 return delta
834 return delta
830
835
831 def _builddeltainfo(self, revinfo, base, fh):
836 def _builddeltainfo(self, revinfo, base, fh):
832 # can we use the cached delta?
837 # can we use the cached delta?
833 delta = None
838 delta = None
834 if revinfo.cachedelta:
839 if revinfo.cachedelta:
835 cachebase, cachediff = revinfo.cachedelta
840 cachebase, cachediff = revinfo.cachedelta
836 #check if the diff still apply
841 #check if the diff still apply
837 currentbase = cachebase
842 currentbase = cachebase
838 while (currentbase != nullrev
843 while (currentbase != nullrev
839 and currentbase != base
844 and currentbase != base
840 and self.revlog.length(currentbase) == 0):
845 and self.revlog.length(currentbase) == 0):
841 currentbase = self.revlog.deltaparent(currentbase)
846 currentbase = self.revlog.deltaparent(currentbase)
842 if currentbase == base:
847 if currentbase == base:
843 delta = revinfo.cachedelta[1]
848 delta = revinfo.cachedelta[1]
844 if delta is None:
849 if delta is None:
845 delta = self._builddeltadiff(base, revinfo, fh)
850 delta = self._builddeltadiff(base, revinfo, fh)
846 revlog = self.revlog
851 revlog = self.revlog
847 header, data = revlog.compress(delta)
852 header, data = revlog.compress(delta)
848 deltalen = len(header) + len(data)
853 deltalen = len(header) + len(data)
849 chainbase = revlog.chainbase(base)
854 chainbase = revlog.chainbase(base)
850 offset = revlog.end(len(revlog) - 1)
855 offset = revlog.end(len(revlog) - 1)
851 dist = deltalen + offset - revlog.start(chainbase)
856 dist = deltalen + offset - revlog.start(chainbase)
852 if revlog._generaldelta:
857 if revlog._generaldelta:
853 deltabase = base
858 deltabase = base
854 else:
859 else:
855 deltabase = chainbase
860 deltabase = chainbase
856 chainlen, compresseddeltalen = revlog._chaininfo(base)
861 chainlen, compresseddeltalen = revlog._chaininfo(base)
857 chainlen += 1
862 chainlen += 1
858 compresseddeltalen += deltalen
863 compresseddeltalen += deltalen
859
864
860 revlog = self.revlog
865 revlog = self.revlog
861 snapshotdepth = None
866 snapshotdepth = None
862 if deltabase == nullrev:
867 if deltabase == nullrev:
863 snapshotdepth = 0
868 snapshotdepth = 0
864 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
869 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
865 # A delta chain should always be one full snapshot,
870 # A delta chain should always be one full snapshot,
866 # zero or more semi-snapshots, and zero or more deltas
871 # zero or more semi-snapshots, and zero or more deltas
867 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
872 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
868 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
873 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
869 snapshotdepth = len(revlog._deltachain(deltabase)[0])
874 snapshotdepth = len(revlog._deltachain(deltabase)[0])
870
875
871 return _deltainfo(dist, deltalen, (header, data), deltabase,
876 return _deltainfo(dist, deltalen, (header, data), deltabase,
872 chainbase, chainlen, compresseddeltalen,
877 chainbase, chainlen, compresseddeltalen,
873 snapshotdepth)
878 snapshotdepth)
874
879
875 def _fullsnapshotinfo(self, fh, revinfo):
880 def _fullsnapshotinfo(self, fh, revinfo):
876 curr = len(self.revlog)
881 curr = len(self.revlog)
877 rawtext = self.buildtext(revinfo, fh)
882 rawtext = self.buildtext(revinfo, fh)
878 data = self.revlog.compress(rawtext)
883 data = self.revlog.compress(rawtext)
879 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
884 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
880 deltabase = chainbase = curr
885 deltabase = chainbase = curr
881 snapshotdepth = 0
886 snapshotdepth = 0
882 chainlen = 1
887 chainlen = 1
883
888
884 return _deltainfo(dist, deltalen, data, deltabase,
889 return _deltainfo(dist, deltalen, data, deltabase,
885 chainbase, chainlen, compresseddeltalen,
890 chainbase, chainlen, compresseddeltalen,
886 snapshotdepth)
891 snapshotdepth)
887
892
888 def finddeltainfo(self, revinfo, fh):
893 def finddeltainfo(self, revinfo, fh):
889 """Find an acceptable delta against a candidate revision
894 """Find an acceptable delta against a candidate revision
890
895
891 revinfo: information about the revision (instance of _revisioninfo)
896 revinfo: information about the revision (instance of _revisioninfo)
892 fh: file handle to either the .i or the .d revlog file,
897 fh: file handle to either the .i or the .d revlog file,
893 depending on whether it is inlined or not
898 depending on whether it is inlined or not
894
899
895 Returns the first acceptable candidate revision, as ordered by
900 Returns the first acceptable candidate revision, as ordered by
896 _candidategroups
901 _candidategroups
897
902
898 If no suitable deltabase is found, we return delta info for a full
903 If no suitable deltabase is found, we return delta info for a full
899 snapshot.
904 snapshot.
900 """
905 """
901 if not revinfo.textlen:
906 if not revinfo.textlen:
902 return self._fullsnapshotinfo(fh, revinfo)
907 return self._fullsnapshotinfo(fh, revinfo)
903
908
904 # no delta for flag processor revision (see "candelta" for why)
909 # no delta for flag processor revision (see "candelta" for why)
905 # not calling candelta since only one revision needs test, also to
910 # not calling candelta since only one revision needs test, also to
906 # avoid overhead fetching flags again.
911 # avoid overhead fetching flags again.
907 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
912 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
908 return self._fullsnapshotinfo(fh, revinfo)
913 return self._fullsnapshotinfo(fh, revinfo)
909
914
910 cachedelta = revinfo.cachedelta
915 cachedelta = revinfo.cachedelta
911 p1 = revinfo.p1
916 p1 = revinfo.p1
912 p2 = revinfo.p2
917 p2 = revinfo.p2
913 revlog = self.revlog
918 revlog = self.revlog
914
919
915 deltainfo = None
920 deltainfo = None
916 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
921 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
917 groups = _candidategroups(self.revlog, revinfo.textlen,
922 groups = _candidategroups(self.revlog, revinfo.textlen,
918 p1r, p2r, cachedelta)
923 p1r, p2r, cachedelta)
919 candidaterevs = next(groups)
924 candidaterevs = next(groups)
920 while candidaterevs is not None:
925 while candidaterevs is not None:
921 nominateddeltas = []
926 nominateddeltas = []
922 if deltainfo is not None:
927 if deltainfo is not None:
923 # if we already found a good delta,
928 # if we already found a good delta,
924 # challenge it against refined candidates
929 # challenge it against refined candidates
925 nominateddeltas.append(deltainfo)
930 nominateddeltas.append(deltainfo)
926 for candidaterev in candidaterevs:
931 for candidaterev in candidaterevs:
927 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
932 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
928 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
933 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
929 nominateddeltas.append(candidatedelta)
934 nominateddeltas.append(candidatedelta)
930 if nominateddeltas:
935 if nominateddeltas:
931 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
936 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
932 if deltainfo is not None:
937 if deltainfo is not None:
933 candidaterevs = groups.send(deltainfo.base)
938 candidaterevs = groups.send(deltainfo.base)
934 else:
939 else:
935 candidaterevs = next(groups)
940 candidaterevs = next(groups)
936
941
937 if deltainfo is None:
942 if deltainfo is None:
938 deltainfo = self._fullsnapshotinfo(fh, revinfo)
943 deltainfo = self._fullsnapshotinfo(fh, revinfo)
939 return deltainfo
944 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now