##// END OF EJS Templates
sparse-revlog: drop unused deltainfo parameter from _slicechunktodensity...
Boris Feld -
r40640:3ac23dad default
parent child Browse files
Show More
@@ -1,925 +1,902 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import heapq
13 import heapq
14 import struct
14 import struct
15
15
16 # import stuff from node for others to import from revlog
16 # import stuff from node for others to import from revlog
17 from ..node import (
17 from ..node import (
18 nullrev,
18 nullrev,
19 )
19 )
20 from ..i18n import _
20 from ..i18n import _
21
21
22 from .constants import (
22 from .constants import (
23 REVIDX_ISCENSORED,
23 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
25 )
26
26
27 from ..thirdparty import (
27 from ..thirdparty import (
28 attr,
28 attr,
29 )
29 )
30
30
31 from .. import (
31 from .. import (
32 error,
32 error,
33 mdiff,
33 mdiff,
34 )
34 )
35
35
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0):
42 def __init__(self, data, density=0.5, mingap=0):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47
47
48 def start(self, rev):
48 def start(self, rev):
49 if rev == 0:
49 if rev == 0:
50 return 0
50 return 0
51 return self._data[rev - 1]
51 return self._data[rev - 1]
52
52
53 def end(self, rev):
53 def end(self, rev):
54 return self._data[rev]
54 return self._data[rev]
55
55
56 def length(self, rev):
56 def length(self, rev):
57 return self.end(rev) - self.start(rev)
57 return self.end(rev) - self.start(rev)
58
58
59 def __len__(self):
59 def __len__(self):
60 return len(self._data)
60 return len(self._data)
61
61
62 def slicechunk(revlog, revs, deltainfo=None, targetsize=None):
62 def slicechunk(revlog, revs, targetsize=None):
63 """slice revs to reduce the amount of unrelated data to be read from disk.
63 """slice revs to reduce the amount of unrelated data to be read from disk.
64
64
65 ``revs`` is sliced into groups that should be read in one time.
65 ``revs`` is sliced into groups that should be read in one time.
66 Assume that revs are sorted.
66 Assume that revs are sorted.
67
67
68 The initial chunk is sliced until the overall density (payload/chunks-span
68 The initial chunk is sliced until the overall density (payload/chunks-span
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
70 `revlog._srmingapsize` is skipped.
70 `revlog._srmingapsize` is skipped.
71
71
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
73 For consistency with other slicing choice, this limit won't go lower than
73 For consistency with other slicing choice, this limit won't go lower than
74 `revlog._srmingapsize`.
74 `revlog._srmingapsize`.
75
75
76 If individual revisions chunk are larger than this limit, they will still
76 If individual revisions chunk are larger than this limit, they will still
77 be raised individually.
77 be raised individually.
78
78
79 >>> revlog = _testrevlog([
79 >>> revlog = _testrevlog([
80 ... 5, #00 (5)
80 ... 5, #00 (5)
81 ... 10, #01 (5)
81 ... 10, #01 (5)
82 ... 12, #02 (2)
82 ... 12, #02 (2)
83 ... 12, #03 (empty)
83 ... 12, #03 (empty)
84 ... 27, #04 (15)
84 ... 27, #04 (15)
85 ... 31, #05 (4)
85 ... 31, #05 (4)
86 ... 31, #06 (empty)
86 ... 31, #06 (empty)
87 ... 42, #07 (11)
87 ... 42, #07 (11)
88 ... 47, #08 (5)
88 ... 47, #08 (5)
89 ... 47, #09 (empty)
89 ... 47, #09 (empty)
90 ... 48, #10 (1)
90 ... 48, #10 (1)
91 ... 51, #11 (3)
91 ... 51, #11 (3)
92 ... 74, #12 (23)
92 ... 74, #12 (23)
93 ... 85, #13 (11)
93 ... 85, #13 (11)
94 ... 86, #14 (1)
94 ... 86, #14 (1)
95 ... 91, #15 (5)
95 ... 91, #15 (5)
96 ... ])
96 ... ])
97
97
98 >>> list(slicechunk(revlog, list(range(16))))
98 >>> list(slicechunk(revlog, list(range(16))))
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
100 >>> list(slicechunk(revlog, [0, 15]))
100 >>> list(slicechunk(revlog, [0, 15]))
101 [[0], [15]]
101 [[0], [15]]
102 >>> list(slicechunk(revlog, [0, 11, 15]))
102 >>> list(slicechunk(revlog, [0, 11, 15]))
103 [[0], [11], [15]]
103 [[0], [11], [15]]
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
105 [[0], [11, 13, 15]]
105 [[0], [11, 13, 15]]
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
107 [[1, 2], [5, 8, 10, 11], [14]]
107 [[1, 2], [5, 8, 10, 11], [14]]
108
108
109 Slicing with a maximum chunk size
109 Slicing with a maximum chunk size
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
111 [[0], [11], [13], [15]]
111 [[0], [11], [13], [15]]
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
113 [[0], [11], [13, 15]]
113 [[0], [11], [13, 15]]
114 """
114 """
115 if targetsize is not None:
115 if targetsize is not None:
116 targetsize = max(targetsize, revlog._srmingapsize)
116 targetsize = max(targetsize, revlog._srmingapsize)
117 # targetsize should not be specified when evaluating delta candidates:
117 # targetsize should not be specified when evaluating delta candidates:
118 # * targetsize is used to ensure we stay within specification when reading,
118 # * targetsize is used to ensure we stay within specification when reading,
119 # * deltainfo is used to pick are good delta chain when writing.
120 if not (deltainfo is None or targetsize is None):
121 msg = 'cannot use `targetsize` with a `deltainfo`'
122 raise error.ProgrammingError(msg)
123 for chunk in _slicechunktodensity(revlog, revs,
119 for chunk in _slicechunktodensity(revlog, revs,
124 deltainfo,
125 revlog._srdensitythreshold,
120 revlog._srdensitythreshold,
126 revlog._srmingapsize):
121 revlog._srmingapsize):
127 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
128 yield subchunk
123 yield subchunk
129
124
130 def _slicechunktosize(revlog, revs, targetsize=None):
125 def _slicechunktosize(revlog, revs, targetsize=None):
131 """slice revs to match the target size
126 """slice revs to match the target size
132
127
133 This is intended to be used on chunk that density slicing selected by that
128 This is intended to be used on chunk that density slicing selected by that
134 are still too large compared to the read garantee of revlog. This might
129 are still too large compared to the read garantee of revlog. This might
135 happens when "minimal gap size" interrupted the slicing or when chain are
130 happens when "minimal gap size" interrupted the slicing or when chain are
136 built in a way that create large blocks next to each other.
131 built in a way that create large blocks next to each other.
137
132
138 >>> revlog = _testrevlog([
133 >>> revlog = _testrevlog([
139 ... 3, #0 (3)
134 ... 3, #0 (3)
140 ... 5, #1 (2)
135 ... 5, #1 (2)
141 ... 6, #2 (1)
136 ... 6, #2 (1)
142 ... 8, #3 (2)
137 ... 8, #3 (2)
143 ... 8, #4 (empty)
138 ... 8, #4 (empty)
144 ... 11, #5 (3)
139 ... 11, #5 (3)
145 ... 12, #6 (1)
140 ... 12, #6 (1)
146 ... 13, #7 (1)
141 ... 13, #7 (1)
147 ... 14, #8 (1)
142 ... 14, #8 (1)
148 ... ])
143 ... ])
149
144
150 Cases where chunk is already small enough
145 Cases where chunk is already small enough
151 >>> list(_slicechunktosize(revlog, [0], 3))
146 >>> list(_slicechunktosize(revlog, [0], 3))
152 [[0]]
147 [[0]]
153 >>> list(_slicechunktosize(revlog, [6, 7], 3))
148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
154 [[6, 7]]
149 [[6, 7]]
155 >>> list(_slicechunktosize(revlog, [0], None))
150 >>> list(_slicechunktosize(revlog, [0], None))
156 [[0]]
151 [[0]]
157 >>> list(_slicechunktosize(revlog, [6, 7], None))
152 >>> list(_slicechunktosize(revlog, [6, 7], None))
158 [[6, 7]]
153 [[6, 7]]
159
154
160 cases where we need actual slicing
155 cases where we need actual slicing
161 >>> list(_slicechunktosize(revlog, [0, 1], 3))
156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
162 [[0], [1]]
157 [[0], [1]]
163 >>> list(_slicechunktosize(revlog, [1, 3], 3))
158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
164 [[1], [3]]
159 [[1], [3]]
165 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
166 [[1, 2], [3]]
161 [[1, 2], [3]]
167 >>> list(_slicechunktosize(revlog, [3, 5], 3))
162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
168 [[3], [5]]
163 [[3], [5]]
169 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
170 [[3], [5]]
165 [[3], [5]]
171 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
172 [[5], [6, 7, 8]]
167 [[5], [6, 7, 8]]
173 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
174 [[0], [1, 2], [3], [5], [6, 7, 8]]
169 [[0], [1, 2], [3], [5], [6, 7, 8]]
175
170
176 Case with too large individual chunk (must return valid chunk)
171 Case with too large individual chunk (must return valid chunk)
177 >>> list(_slicechunktosize(revlog, [0, 1], 2))
172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
178 [[0], [1]]
173 [[0], [1]]
179 >>> list(_slicechunktosize(revlog, [1, 3], 1))
174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
180 [[1], [3]]
175 [[1], [3]]
181 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
182 [[3], [5]]
177 [[3], [5]]
183 """
178 """
184 assert targetsize is None or 0 <= targetsize
179 assert targetsize is None or 0 <= targetsize
185 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
186 yield revs
181 yield revs
187 return
182 return
188
183
189 startrevidx = 0
184 startrevidx = 0
190 startdata = revlog.start(revs[0])
185 startdata = revlog.start(revs[0])
191 endrevidx = 0
186 endrevidx = 0
192 iterrevs = enumerate(revs)
187 iterrevs = enumerate(revs)
193 next(iterrevs) # skip first rev.
188 next(iterrevs) # skip first rev.
194 for idx, r in iterrevs:
189 for idx, r in iterrevs:
195 span = revlog.end(r) - startdata
190 span = revlog.end(r) - startdata
196 if span <= targetsize:
191 if span <= targetsize:
197 endrevidx = idx
192 endrevidx = idx
198 else:
193 else:
199 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
200 if chunk:
195 if chunk:
201 yield chunk
196 yield chunk
202 startrevidx = idx
197 startrevidx = idx
203 startdata = revlog.start(r)
198 startdata = revlog.start(r)
204 endrevidx = idx
199 endrevidx = idx
205 yield _trimchunk(revlog, revs, startrevidx)
200 yield _trimchunk(revlog, revs, startrevidx)
206
201
207 def _slicechunktodensity(revlog, revs, deltainfo=None, targetdensity=0.5,
202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
208 mingapsize=0):
203 mingapsize=0):
209 """slice revs to reduce the amount of unrelated data to be read from disk.
204 """slice revs to reduce the amount of unrelated data to be read from disk.
210
205
211 ``revs`` is sliced into groups that should be read in one time.
206 ``revs`` is sliced into groups that should be read in one time.
212 Assume that revs are sorted.
207 Assume that revs are sorted.
213
208
214 ``deltainfo`` is a _deltainfo instance of a revision that we would append
215 to the top of the revlog.
216
217 The initial chunk is sliced until the overall density (payload/chunks-span
209 The initial chunk is sliced until the overall density (payload/chunks-span
218 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
219 skipped.
211 skipped.
220
212
221 >>> revlog = _testrevlog([
213 >>> revlog = _testrevlog([
222 ... 5, #00 (5)
214 ... 5, #00 (5)
223 ... 10, #01 (5)
215 ... 10, #01 (5)
224 ... 12, #02 (2)
216 ... 12, #02 (2)
225 ... 12, #03 (empty)
217 ... 12, #03 (empty)
226 ... 27, #04 (15)
218 ... 27, #04 (15)
227 ... 31, #05 (4)
219 ... 31, #05 (4)
228 ... 31, #06 (empty)
220 ... 31, #06 (empty)
229 ... 42, #07 (11)
221 ... 42, #07 (11)
230 ... 47, #08 (5)
222 ... 47, #08 (5)
231 ... 47, #09 (empty)
223 ... 47, #09 (empty)
232 ... 48, #10 (1)
224 ... 48, #10 (1)
233 ... 51, #11 (3)
225 ... 51, #11 (3)
234 ... 74, #12 (23)
226 ... 74, #12 (23)
235 ... 85, #13 (11)
227 ... 85, #13 (11)
236 ... 86, #14 (1)
228 ... 86, #14 (1)
237 ... 91, #15 (5)
229 ... 91, #15 (5)
238 ... ])
230 ... ])
239
231
240 >>> list(_slicechunktodensity(revlog, list(range(16))))
232 >>> list(_slicechunktodensity(revlog, list(range(16))))
241 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
242 >>> list(_slicechunktodensity(revlog, [0, 15]))
234 >>> list(_slicechunktodensity(revlog, [0, 15]))
243 [[0], [15]]
235 [[0], [15]]
244 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
245 [[0], [11], [15]]
237 [[0], [11], [15]]
246 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
247 [[0], [11, 13, 15]]
239 [[0], [11, 13, 15]]
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
249 [[1, 2], [5, 8, 10, 11], [14]]
241 [[1, 2], [5, 8, 10, 11], [14]]
250 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
251 ... mingapsize=20))
243 ... mingapsize=20))
252 [[1, 2, 3, 5, 8, 10, 11], [14]]
244 [[1, 2, 3, 5, 8, 10, 11], [14]]
253 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
254 ... targetdensity=0.95))
246 ... targetdensity=0.95))
255 [[1, 2], [5], [8, 10, 11], [14]]
247 [[1, 2], [5], [8, 10, 11], [14]]
256 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
257 ... targetdensity=0.95, mingapsize=12))
249 ... targetdensity=0.95, mingapsize=12))
258 [[1, 2], [5, 8, 10, 11], [14]]
250 [[1, 2], [5, 8, 10, 11], [14]]
259 """
251 """
260 start = revlog.start
252 start = revlog.start
261 length = revlog.length
253 length = revlog.length
262
254
263 if len(revs) <= 1:
255 if len(revs) <= 1:
264 yield revs
256 yield revs
265 return
257 return
266
258
267 nextrev = len(revlog)
259 deltachainspan = segmentspan(revlog, revs)
268 nextoffset = revlog.end(nextrev - 1)
260 chainpayload = sum(length(r) for r in revs)
269
270 if deltainfo is None:
271 deltachainspan = segmentspan(revlog, revs)
272 chainpayload = sum(length(r) for r in revs)
273 else:
274 deltachainspan = deltainfo.distance
275 chainpayload = deltainfo.compresseddeltalen
276
261
277 if deltachainspan < mingapsize:
262 if deltachainspan < mingapsize:
278 yield revs
263 yield revs
279 return
264 return
280
265
281 readdata = deltachainspan
266 readdata = deltachainspan
282
267
283 if deltachainspan:
268 if deltachainspan:
284 density = chainpayload / float(deltachainspan)
269 density = chainpayload / float(deltachainspan)
285 else:
270 else:
286 density = 1.0
271 density = 1.0
287
272
288 if density >= targetdensity:
273 if density >= targetdensity:
289 yield revs
274 yield revs
290 return
275 return
291
276
292 if deltainfo is not None and deltainfo.deltalen:
293 revs = list(revs)
294 revs.append(nextrev)
295
296 # Store the gaps in a heap to have them sorted by decreasing size
277 # Store the gaps in a heap to have them sorted by decreasing size
297 gapsheap = []
278 gapsheap = []
298 heapq.heapify(gapsheap)
279 heapq.heapify(gapsheap)
299 prevend = None
280 prevend = None
300 for i, rev in enumerate(revs):
281 for i, rev in enumerate(revs):
301 if rev < nextrev:
282 revstart = start(rev)
302 revstart = start(rev)
283 revlen = length(rev)
303 revlen = length(rev)
304 else:
305 revstart = nextoffset
306 revlen = deltainfo.deltalen
307
284
308 # Skip empty revisions to form larger holes
285 # Skip empty revisions to form larger holes
309 if revlen == 0:
286 if revlen == 0:
310 continue
287 continue
311
288
312 if prevend is not None:
289 if prevend is not None:
313 gapsize = revstart - prevend
290 gapsize = revstart - prevend
314 # only consider holes that are large enough
291 # only consider holes that are large enough
315 if gapsize > mingapsize:
292 if gapsize > mingapsize:
316 heapq.heappush(gapsheap, (-gapsize, i))
293 heapq.heappush(gapsheap, (-gapsize, i))
317
294
318 prevend = revstart + revlen
295 prevend = revstart + revlen
319
296
320 # Collect the indices of the largest holes until the density is acceptable
297 # Collect the indices of the largest holes until the density is acceptable
321 indicesheap = []
298 indicesheap = []
322 heapq.heapify(indicesheap)
299 heapq.heapify(indicesheap)
323 while gapsheap and density < targetdensity:
300 while gapsheap and density < targetdensity:
324 oppgapsize, gapidx = heapq.heappop(gapsheap)
301 oppgapsize, gapidx = heapq.heappop(gapsheap)
325
302
326 heapq.heappush(indicesheap, gapidx)
303 heapq.heappush(indicesheap, gapidx)
327
304
328 # the gap sizes are stored as negatives to be sorted decreasingly
305 # the gap sizes are stored as negatives to be sorted decreasingly
329 # by the heap
306 # by the heap
330 readdata -= (-oppgapsize)
307 readdata -= (-oppgapsize)
331 if readdata > 0:
308 if readdata > 0:
332 density = chainpayload / float(readdata)
309 density = chainpayload / float(readdata)
333 else:
310 else:
334 density = 1.0
311 density = 1.0
335
312
336 # Cut the revs at collected indices
313 # Cut the revs at collected indices
337 previdx = 0
314 previdx = 0
338 while indicesheap:
315 while indicesheap:
339 idx = heapq.heappop(indicesheap)
316 idx = heapq.heappop(indicesheap)
340
317
341 chunk = _trimchunk(revlog, revs, previdx, idx)
318 chunk = _trimchunk(revlog, revs, previdx, idx)
342 if chunk:
319 if chunk:
343 yield chunk
320 yield chunk
344
321
345 previdx = idx
322 previdx = idx
346
323
347 chunk = _trimchunk(revlog, revs, previdx)
324 chunk = _trimchunk(revlog, revs, previdx)
348 if chunk:
325 if chunk:
349 yield chunk
326 yield chunk
350
327
351 def _trimchunk(revlog, revs, startidx, endidx=None):
328 def _trimchunk(revlog, revs, startidx, endidx=None):
352 """returns revs[startidx:endidx] without empty trailing revs
329 """returns revs[startidx:endidx] without empty trailing revs
353
330
354 Doctest Setup
331 Doctest Setup
355 >>> revlog = _testrevlog([
332 >>> revlog = _testrevlog([
356 ... 5, #0
333 ... 5, #0
357 ... 10, #1
334 ... 10, #1
358 ... 12, #2
335 ... 12, #2
359 ... 12, #3 (empty)
336 ... 12, #3 (empty)
360 ... 17, #4
337 ... 17, #4
361 ... 21, #5
338 ... 21, #5
362 ... 21, #6 (empty)
339 ... 21, #6 (empty)
363 ... ])
340 ... ])
364
341
365 Contiguous cases:
342 Contiguous cases:
366 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
343 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
367 [0, 1, 2, 3, 4, 5]
344 [0, 1, 2, 3, 4, 5]
368 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
345 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
369 [0, 1, 2, 3, 4]
346 [0, 1, 2, 3, 4]
370 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
347 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
371 [0, 1, 2]
348 [0, 1, 2]
372 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
349 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
373 [2]
350 [2]
374 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
351 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
375 [3, 4, 5]
352 [3, 4, 5]
376 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
353 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
377 [3, 4]
354 [3, 4]
378
355
379 Discontiguous cases:
356 Discontiguous cases:
380 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
357 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
381 [1, 3, 5]
358 [1, 3, 5]
382 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
359 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
383 [1]
360 [1]
384 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
361 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
385 [3, 5]
362 [3, 5]
386 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
363 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
387 [3, 5]
364 [3, 5]
388 """
365 """
389 length = revlog.length
366 length = revlog.length
390
367
391 if endidx is None:
368 if endidx is None:
392 endidx = len(revs)
369 endidx = len(revs)
393
370
394 # If we have a non-emtpy delta candidate, there are nothing to trim
371 # If we have a non-emtpy delta candidate, there are nothing to trim
395 if revs[endidx - 1] < len(revlog):
372 if revs[endidx - 1] < len(revlog):
396 # Trim empty revs at the end, except the very first revision of a chain
373 # Trim empty revs at the end, except the very first revision of a chain
397 while (endidx > 1
374 while (endidx > 1
398 and endidx > startidx
375 and endidx > startidx
399 and length(revs[endidx - 1]) == 0):
376 and length(revs[endidx - 1]) == 0):
400 endidx -= 1
377 endidx -= 1
401
378
402 return revs[startidx:endidx]
379 return revs[startidx:endidx]
403
380
404 def segmentspan(revlog, revs, deltainfo=None):
381 def segmentspan(revlog, revs, deltainfo=None):
405 """Get the byte span of a segment of revisions
382 """Get the byte span of a segment of revisions
406
383
407 revs is a sorted array of revision numbers
384 revs is a sorted array of revision numbers
408
385
409 >>> revlog = _testrevlog([
386 >>> revlog = _testrevlog([
410 ... 5, #0
387 ... 5, #0
411 ... 10, #1
388 ... 10, #1
412 ... 12, #2
389 ... 12, #2
413 ... 12, #3 (empty)
390 ... 12, #3 (empty)
414 ... 17, #4
391 ... 17, #4
415 ... ])
392 ... ])
416
393
417 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
394 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
418 17
395 17
419 >>> segmentspan(revlog, [0, 4])
396 >>> segmentspan(revlog, [0, 4])
420 17
397 17
421 >>> segmentspan(revlog, [3, 4])
398 >>> segmentspan(revlog, [3, 4])
422 5
399 5
423 >>> segmentspan(revlog, [1, 2, 3,])
400 >>> segmentspan(revlog, [1, 2, 3,])
424 7
401 7
425 >>> segmentspan(revlog, [1, 3])
402 >>> segmentspan(revlog, [1, 3])
426 7
403 7
427 """
404 """
428 if not revs:
405 if not revs:
429 return 0
406 return 0
430 if deltainfo is not None and len(revlog) <= revs[-1]:
407 if deltainfo is not None and len(revlog) <= revs[-1]:
431 if len(revs) == 1:
408 if len(revs) == 1:
432 return deltainfo.deltalen
409 return deltainfo.deltalen
433 offset = revlog.end(len(revlog) - 1)
410 offset = revlog.end(len(revlog) - 1)
434 end = deltainfo.deltalen + offset
411 end = deltainfo.deltalen + offset
435 else:
412 else:
436 end = revlog.end(revs[-1])
413 end = revlog.end(revs[-1])
437 return end - revlog.start(revs[0])
414 return end - revlog.start(revs[0])
438
415
439 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
416 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
440 """build full text from a (base, delta) pair and other metadata"""
417 """build full text from a (base, delta) pair and other metadata"""
441 # special case deltas which replace entire base; no need to decode
418 # special case deltas which replace entire base; no need to decode
442 # base revision. this neatly avoids censored bases, which throw when
419 # base revision. this neatly avoids censored bases, which throw when
443 # they're decoded.
420 # they're decoded.
444 hlen = struct.calcsize(">lll")
421 hlen = struct.calcsize(">lll")
445 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
422 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
446 len(delta) - hlen):
423 len(delta) - hlen):
447 fulltext = delta[hlen:]
424 fulltext = delta[hlen:]
448 else:
425 else:
449 # deltabase is rawtext before changed by flag processors, which is
426 # deltabase is rawtext before changed by flag processors, which is
450 # equivalent to non-raw text
427 # equivalent to non-raw text
451 basetext = revlog.revision(baserev, _df=fh, raw=False)
428 basetext = revlog.revision(baserev, _df=fh, raw=False)
452 fulltext = mdiff.patch(basetext, delta)
429 fulltext = mdiff.patch(basetext, delta)
453
430
454 try:
431 try:
455 res = revlog._processflags(fulltext, flags, 'read', raw=True)
432 res = revlog._processflags(fulltext, flags, 'read', raw=True)
456 fulltext, validatehash = res
433 fulltext, validatehash = res
457 if validatehash:
434 if validatehash:
458 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
435 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
459 if flags & REVIDX_ISCENSORED:
436 if flags & REVIDX_ISCENSORED:
460 raise error.StorageError(_('node %s is not censored') %
437 raise error.StorageError(_('node %s is not censored') %
461 expectednode)
438 expectednode)
462 except error.CensoredNodeError:
439 except error.CensoredNodeError:
463 # must pass the censored index flag to add censored revisions
440 # must pass the censored index flag to add censored revisions
464 if not flags & REVIDX_ISCENSORED:
441 if not flags & REVIDX_ISCENSORED:
465 raise
442 raise
466 return fulltext
443 return fulltext
467
444
468 @attr.s(slots=True, frozen=True)
445 @attr.s(slots=True, frozen=True)
469 class _deltainfo(object):
446 class _deltainfo(object):
470 distance = attr.ib()
447 distance = attr.ib()
471 deltalen = attr.ib()
448 deltalen = attr.ib()
472 data = attr.ib()
449 data = attr.ib()
473 base = attr.ib()
450 base = attr.ib()
474 chainbase = attr.ib()
451 chainbase = attr.ib()
475 chainlen = attr.ib()
452 chainlen = attr.ib()
476 compresseddeltalen = attr.ib()
453 compresseddeltalen = attr.ib()
477 snapshotdepth = attr.ib()
454 snapshotdepth = attr.ib()
478
455
479 def isgooddeltainfo(revlog, deltainfo, revinfo):
456 def isgooddeltainfo(revlog, deltainfo, revinfo):
480 """Returns True if the given delta is good. Good means that it is within
457 """Returns True if the given delta is good. Good means that it is within
481 the disk span, disk size, and chain length bounds that we know to be
458 the disk span, disk size, and chain length bounds that we know to be
482 performant."""
459 performant."""
483 if deltainfo is None:
460 if deltainfo is None:
484 return False
461 return False
485
462
486 # - 'deltainfo.distance' is the distance from the base revision --
463 # - 'deltainfo.distance' is the distance from the base revision --
487 # bounding it limits the amount of I/O we need to do.
464 # bounding it limits the amount of I/O we need to do.
488 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
465 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
489 # deltas we need to apply -- bounding it limits the amount of CPU
466 # deltas we need to apply -- bounding it limits the amount of CPU
490 # we consume.
467 # we consume.
491
468
492 textlen = revinfo.textlen
469 textlen = revinfo.textlen
493 defaultmax = textlen * 4
470 defaultmax = textlen * 4
494 maxdist = revlog._maxdeltachainspan
471 maxdist = revlog._maxdeltachainspan
495 if not maxdist:
472 if not maxdist:
496 maxdist = deltainfo.distance # ensure the conditional pass
473 maxdist = deltainfo.distance # ensure the conditional pass
497 maxdist = max(maxdist, defaultmax)
474 maxdist = max(maxdist, defaultmax)
498
475
499 # Bad delta from read span:
476 # Bad delta from read span:
500 #
477 #
501 # If the span of data read is larger than the maximum allowed.
478 # If the span of data read is larger than the maximum allowed.
502 #
479 #
503 # In the sparse-revlog case, we rely on the associated "sparse reading"
480 # In the sparse-revlog case, we rely on the associated "sparse reading"
504 # to avoid issue related to the span of data. In theory, it would be
481 # to avoid issue related to the span of data. In theory, it would be
505 # possible to build pathological revlog where delta pattern would lead
482 # possible to build pathological revlog where delta pattern would lead
506 # to too many reads. However, they do not happen in practice at all. So
483 # to too many reads. However, they do not happen in practice at all. So
507 # we skip the span check entirely.
484 # we skip the span check entirely.
508 if not revlog._sparserevlog and maxdist < deltainfo.distance:
485 if not revlog._sparserevlog and maxdist < deltainfo.distance:
509 return False
486 return False
510
487
511 # Bad delta from new delta size:
488 # Bad delta from new delta size:
512 #
489 #
513 # If the delta size is larger than the target text, storing the
490 # If the delta size is larger than the target text, storing the
514 # delta will be inefficient.
491 # delta will be inefficient.
515 if textlen < deltainfo.deltalen:
492 if textlen < deltainfo.deltalen:
516 return False
493 return False
517
494
518 # Bad delta from cumulated payload size:
495 # Bad delta from cumulated payload size:
519 #
496 #
520 # If the sum of delta get larger than K * target text length.
497 # If the sum of delta get larger than K * target text length.
521 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
498 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
522 return False
499 return False
523
500
524 # Bad delta from chain length:
501 # Bad delta from chain length:
525 #
502 #
526 # If the number of delta in the chain gets too high.
503 # If the number of delta in the chain gets too high.
527 if (revlog._maxchainlen
504 if (revlog._maxchainlen
528 and revlog._maxchainlen < deltainfo.chainlen):
505 and revlog._maxchainlen < deltainfo.chainlen):
529 return False
506 return False
530
507
531 # bad delta from intermediate snapshot size limit
508 # bad delta from intermediate snapshot size limit
532 #
509 #
533 # If an intermediate snapshot size is higher than the limit. The
510 # If an intermediate snapshot size is higher than the limit. The
534 # limit exist to prevent endless chain of intermediate delta to be
511 # limit exist to prevent endless chain of intermediate delta to be
535 # created.
512 # created.
536 if (deltainfo.snapshotdepth is not None and
513 if (deltainfo.snapshotdepth is not None and
537 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
514 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
538 return False
515 return False
539
516
540 # bad delta if new intermediate snapshot is larger than the previous
517 # bad delta if new intermediate snapshot is larger than the previous
541 # snapshot
518 # snapshot
542 if (deltainfo.snapshotdepth
519 if (deltainfo.snapshotdepth
543 and revlog.length(deltainfo.base) < deltainfo.deltalen):
520 and revlog.length(deltainfo.base) < deltainfo.deltalen):
544 return False
521 return False
545
522
546 return True
523 return True
547
524
548 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
525 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
549 """Provides group of revision to be tested as delta base
526 """Provides group of revision to be tested as delta base
550
527
551 This top level function focus on emitting groups with unique and worthwhile
528 This top level function focus on emitting groups with unique and worthwhile
552 content. See _raw_candidate_groups for details about the group order.
529 content. See _raw_candidate_groups for details about the group order.
553 """
530 """
554 # should we try to build a delta?
531 # should we try to build a delta?
555 if not (len(revlog) and revlog._storedeltachains):
532 if not (len(revlog) and revlog._storedeltachains):
556 yield None
533 yield None
557 return
534 return
558
535
559 deltalength = revlog.length
536 deltalength = revlog.length
560 deltaparent = revlog.deltaparent
537 deltaparent = revlog.deltaparent
561 good = None
538 good = None
562
539
563 deltas_limit = textlen * LIMIT_DELTA2TEXT
540 deltas_limit = textlen * LIMIT_DELTA2TEXT
564
541
565 tested = set([nullrev])
542 tested = set([nullrev])
566 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
543 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
567 while True:
544 while True:
568 temptative = candidates.send(good)
545 temptative = candidates.send(good)
569 if temptative is None:
546 if temptative is None:
570 break
547 break
571 group = []
548 group = []
572 for rev in temptative:
549 for rev in temptative:
573 # skip over empty delta (no need to include them in a chain)
550 # skip over empty delta (no need to include them in a chain)
574 while (revlog._generaldelta
551 while (revlog._generaldelta
575 and not (rev == nullrev
552 and not (rev == nullrev
576 or rev in tested
553 or rev in tested
577 or deltalength(rev))):
554 or deltalength(rev))):
578 tested.add(rev)
555 tested.add(rev)
579 rev = deltaparent(rev)
556 rev = deltaparent(rev)
580 # filter out revision we tested already
557 # filter out revision we tested already
581 if rev in tested:
558 if rev in tested:
582 continue
559 continue
583 tested.add(rev)
560 tested.add(rev)
584 # filter out delta base that will never produce good delta
561 # filter out delta base that will never produce good delta
585 if deltas_limit < revlog.length(rev):
562 if deltas_limit < revlog.length(rev):
586 continue
563 continue
587 # no need to try a delta against nullrev, this will be done as a
564 # no need to try a delta against nullrev, this will be done as a
588 # last resort.
565 # last resort.
589 if rev == nullrev:
566 if rev == nullrev:
590 continue
567 continue
591 # no delta for rawtext-changing revs (see "candelta" for why)
568 # no delta for rawtext-changing revs (see "candelta" for why)
592 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
569 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
593 continue
570 continue
594 group.append(rev)
571 group.append(rev)
595 if group:
572 if group:
596 # XXX: in the sparse revlog case, group can become large,
573 # XXX: in the sparse revlog case, group can become large,
597 # impacting performances. Some bounding or slicing mecanism
574 # impacting performances. Some bounding or slicing mecanism
598 # would help to reduce this impact.
575 # would help to reduce this impact.
599 good = yield tuple(group)
576 good = yield tuple(group)
600 yield None
577 yield None
601
578
602 def _findsnapshots(revlog, cache, start_rev):
579 def _findsnapshots(revlog, cache, start_rev):
603 """find snapshot from start_rev to tip"""
580 """find snapshot from start_rev to tip"""
604 deltaparent = revlog.deltaparent
581 deltaparent = revlog.deltaparent
605 issnapshot = revlog.issnapshot
582 issnapshot = revlog.issnapshot
606 for rev in revlog.revs(start_rev):
583 for rev in revlog.revs(start_rev):
607 if issnapshot(rev):
584 if issnapshot(rev):
608 cache[deltaparent(rev)].append(rev)
585 cache[deltaparent(rev)].append(rev)
609
586
610 def _refinedgroups(revlog, p1, p2, cachedelta):
587 def _refinedgroups(revlog, p1, p2, cachedelta):
611 good = None
588 good = None
612 # First we try to reuse a the delta contained in the bundle.
589 # First we try to reuse a the delta contained in the bundle.
613 # (or from the source revlog)
590 # (or from the source revlog)
614 #
591 #
615 # This logic only applies to general delta repositories and can be disabled
592 # This logic only applies to general delta repositories and can be disabled
616 # through configuration. Disabling reuse source delta is useful when
593 # through configuration. Disabling reuse source delta is useful when
617 # we want to make sure we recomputed "optimal" deltas.
594 # we want to make sure we recomputed "optimal" deltas.
618 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
595 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
619 # Assume what we received from the server is a good choice
596 # Assume what we received from the server is a good choice
620 # build delta will reuse the cache
597 # build delta will reuse the cache
621 good = yield (cachedelta[0],)
598 good = yield (cachedelta[0],)
622 if good is not None:
599 if good is not None:
623 yield None
600 yield None
624 return
601 return
625 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
602 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
626 good = yield candidates
603 good = yield candidates
627 if good is not None:
604 if good is not None:
628 break
605 break
629
606
630 # If sparse revlog is enabled, we can try to refine the available deltas
607 # If sparse revlog is enabled, we can try to refine the available deltas
631 if not revlog._sparserevlog:
608 if not revlog._sparserevlog:
632 yield None
609 yield None
633 return
610 return
634
611
635 # if we have a refinable value, try to refine it
612 # if we have a refinable value, try to refine it
636 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
613 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
637 # refine snapshot down
614 # refine snapshot down
638 previous = None
615 previous = None
639 while previous != good:
616 while previous != good:
640 previous = good
617 previous = good
641 base = revlog.deltaparent(good)
618 base = revlog.deltaparent(good)
642 if base == nullrev:
619 if base == nullrev:
643 break
620 break
644 good = yield (base,)
621 good = yield (base,)
645 # refine snapshot up
622 # refine snapshot up
646 #
623 #
647 # XXX the _findsnapshots call can be expensive and is "duplicated" with
624 # XXX the _findsnapshots call can be expensive and is "duplicated" with
648 # the one done in `_rawgroups`. Once we start working on performance,
625 # the one done in `_rawgroups`. Once we start working on performance,
649 # we should make the two logics share this computation.
626 # we should make the two logics share this computation.
650 snapshots = collections.defaultdict(list)
627 snapshots = collections.defaultdict(list)
651 _findsnapshots(revlog, snapshots, good + 1)
628 _findsnapshots(revlog, snapshots, good + 1)
652 previous = None
629 previous = None
653 while good != previous:
630 while good != previous:
654 previous = good
631 previous = good
655 children = tuple(sorted(c for c in snapshots[good]))
632 children = tuple(sorted(c for c in snapshots[good]))
656 good = yield children
633 good = yield children
657
634
658 # we have found nothing
635 # we have found nothing
659 yield None
636 yield None
660
637
661 def _rawgroups(revlog, p1, p2, cachedelta):
638 def _rawgroups(revlog, p1, p2, cachedelta):
662 """Provides group of revision to be tested as delta base
639 """Provides group of revision to be tested as delta base
663
640
664 This lower level function focus on emitting delta theorically interresting
641 This lower level function focus on emitting delta theorically interresting
665 without looking it any practical details.
642 without looking it any practical details.
666
643
667 The group order aims at providing fast or small candidates first.
644 The group order aims at providing fast or small candidates first.
668 """
645 """
669 gdelta = revlog._generaldelta
646 gdelta = revlog._generaldelta
670 sparse = revlog._sparserevlog
647 sparse = revlog._sparserevlog
671 curr = len(revlog)
648 curr = len(revlog)
672 prev = curr - 1
649 prev = curr - 1
673 deltachain = lambda rev: revlog._deltachain(rev)[0]
650 deltachain = lambda rev: revlog._deltachain(rev)[0]
674
651
675 if gdelta:
652 if gdelta:
676 # exclude already lazy tested base if any
653 # exclude already lazy tested base if any
677 parents = [p for p in (p1, p2) if p != nullrev]
654 parents = [p for p in (p1, p2) if p != nullrev]
678
655
679 if not revlog._deltabothparents and len(parents) == 2:
656 if not revlog._deltabothparents and len(parents) == 2:
680 parents.sort()
657 parents.sort()
681 # To minimize the chance of having to build a fulltext,
658 # To minimize the chance of having to build a fulltext,
682 # pick first whichever parent is closest to us (max rev)
659 # pick first whichever parent is closest to us (max rev)
683 yield (parents[1],)
660 yield (parents[1],)
684 # then the other one (min rev) if the first did not fit
661 # then the other one (min rev) if the first did not fit
685 yield (parents[0],)
662 yield (parents[0],)
686 elif len(parents) > 0:
663 elif len(parents) > 0:
687 # Test all parents (1 or 2), and keep the best candidate
664 # Test all parents (1 or 2), and keep the best candidate
688 yield parents
665 yield parents
689
666
690 if sparse and parents:
667 if sparse and parents:
691 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
668 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
692 # See if we can use an existing snapshot in the parent chains to use as
669 # See if we can use an existing snapshot in the parent chains to use as
693 # a base for a new intermediate-snapshot
670 # a base for a new intermediate-snapshot
694 #
671 #
695 # search for snapshot in parents delta chain
672 # search for snapshot in parents delta chain
696 # map: snapshot-level: snapshot-rev
673 # map: snapshot-level: snapshot-rev
697 parents_snaps = collections.defaultdict(set)
674 parents_snaps = collections.defaultdict(set)
698 candidate_chains = [deltachain(p) for p in parents]
675 candidate_chains = [deltachain(p) for p in parents]
699 for chain in candidate_chains:
676 for chain in candidate_chains:
700 for idx, s in enumerate(chain):
677 for idx, s in enumerate(chain):
701 if not revlog.issnapshot(s):
678 if not revlog.issnapshot(s):
702 break
679 break
703 parents_snaps[idx].add(s)
680 parents_snaps[idx].add(s)
704 snapfloor = min(parents_snaps[0]) + 1
681 snapfloor = min(parents_snaps[0]) + 1
705 _findsnapshots(revlog, snapshots, snapfloor)
682 _findsnapshots(revlog, snapshots, snapfloor)
706 # search for the highest "unrelated" revision
683 # search for the highest "unrelated" revision
707 #
684 #
708 # Adding snapshots used by "unrelated" revision increase the odd we
685 # Adding snapshots used by "unrelated" revision increase the odd we
709 # reuse an independant, yet better snapshot chain.
686 # reuse an independant, yet better snapshot chain.
710 #
687 #
711 # XXX instead of building a set of revisions, we could lazily enumerate
688 # XXX instead of building a set of revisions, we could lazily enumerate
712 # over the chains. That would be more efficient, however we stick to
689 # over the chains. That would be more efficient, however we stick to
713 # simple code for now.
690 # simple code for now.
714 all_revs = set()
691 all_revs = set()
715 for chain in candidate_chains:
692 for chain in candidate_chains:
716 all_revs.update(chain)
693 all_revs.update(chain)
717 other = None
694 other = None
718 for r in revlog.revs(prev, snapfloor):
695 for r in revlog.revs(prev, snapfloor):
719 if r not in all_revs:
696 if r not in all_revs:
720 other = r
697 other = r
721 break
698 break
722 if other is not None:
699 if other is not None:
723 # To avoid unfair competition, we won't use unrelated intermediate
700 # To avoid unfair competition, we won't use unrelated intermediate
724 # snapshot that are deeper than the ones from the parent delta
701 # snapshot that are deeper than the ones from the parent delta
725 # chain.
702 # chain.
726 max_depth = max(parents_snaps.keys())
703 max_depth = max(parents_snaps.keys())
727 chain = deltachain(other)
704 chain = deltachain(other)
728 for idx, s in enumerate(chain):
705 for idx, s in enumerate(chain):
729 if s < snapfloor:
706 if s < snapfloor:
730 continue
707 continue
731 if max_depth < idx:
708 if max_depth < idx:
732 break
709 break
733 if not revlog.issnapshot(s):
710 if not revlog.issnapshot(s):
734 break
711 break
735 parents_snaps[idx].add(s)
712 parents_snaps[idx].add(s)
736 # Test them as possible intermediate snapshot base
713 # Test them as possible intermediate snapshot base
737 # We test them from highest to lowest level. High level one are more
714 # We test them from highest to lowest level. High level one are more
738 # likely to result in small delta
715 # likely to result in small delta
739 floor = None
716 floor = None
740 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
717 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
741 siblings = set()
718 siblings = set()
742 for s in snaps:
719 for s in snaps:
743 siblings.update(snapshots[s])
720 siblings.update(snapshots[s])
744 # Before considering making a new intermediate snapshot, we check
721 # Before considering making a new intermediate snapshot, we check
745 # if an existing snapshot, children of base we consider, would be
722 # if an existing snapshot, children of base we consider, would be
746 # suitable.
723 # suitable.
747 #
724 #
748 # It give a change to reuse a delta chain "unrelated" to the
725 # It give a change to reuse a delta chain "unrelated" to the
749 # current revision instead of starting our own. Without such
726 # current revision instead of starting our own. Without such
750 # re-use, topological branches would keep reopening new chains.
727 # re-use, topological branches would keep reopening new chains.
751 # Creating more and more snapshot as the repository grow.
728 # Creating more and more snapshot as the repository grow.
752
729
753 if floor is not None:
730 if floor is not None:
754 # We only do this for siblings created after the one in our
731 # We only do this for siblings created after the one in our
755 # parent's delta chain. Those created before has less chances
732 # parent's delta chain. Those created before has less chances
756 # to be valid base since our ancestors had to create a new
733 # to be valid base since our ancestors had to create a new
757 # snapshot.
734 # snapshot.
758 siblings = [r for r in siblings if floor < r]
735 siblings = [r for r in siblings if floor < r]
759 yield tuple(sorted(siblings))
736 yield tuple(sorted(siblings))
760 # then test the base from our parent's delta chain.
737 # then test the base from our parent's delta chain.
761 yield tuple(sorted(snaps))
738 yield tuple(sorted(snaps))
762 floor = min(snaps)
739 floor = min(snaps)
763 # No suitable base found in the parent chain, search if any full
740 # No suitable base found in the parent chain, search if any full
764 # snapshots emitted since parent's base would be a suitable base for an
741 # snapshots emitted since parent's base would be a suitable base for an
765 # intermediate snapshot.
742 # intermediate snapshot.
766 #
743 #
767 # It give a chance to reuse a delta chain unrelated to the current
744 # It give a chance to reuse a delta chain unrelated to the current
768 # revisions instead of starting our own. Without such re-use,
745 # revisions instead of starting our own. Without such re-use,
769 # topological branches would keep reopening new full chains. Creating
746 # topological branches would keep reopening new full chains. Creating
770 # more and more snapshot as the repository grow.
747 # more and more snapshot as the repository grow.
771 yield tuple(snapshots[nullrev])
748 yield tuple(snapshots[nullrev])
772
749
773 if not sparse:
750 if not sparse:
774 # other approach failed try against prev to hopefully save us a
751 # other approach failed try against prev to hopefully save us a
775 # fulltext.
752 # fulltext.
776 yield (prev,)
753 yield (prev,)
777
754
778 class deltacomputer(object):
755 class deltacomputer(object):
779 def __init__(self, revlog):
756 def __init__(self, revlog):
780 self.revlog = revlog
757 self.revlog = revlog
781
758
782 def buildtext(self, revinfo, fh):
759 def buildtext(self, revinfo, fh):
783 """Builds a fulltext version of a revision
760 """Builds a fulltext version of a revision
784
761
785 revinfo: _revisioninfo instance that contains all needed info
762 revinfo: _revisioninfo instance that contains all needed info
786 fh: file handle to either the .i or the .d revlog file,
763 fh: file handle to either the .i or the .d revlog file,
787 depending on whether it is inlined or not
764 depending on whether it is inlined or not
788 """
765 """
789 btext = revinfo.btext
766 btext = revinfo.btext
790 if btext[0] is not None:
767 if btext[0] is not None:
791 return btext[0]
768 return btext[0]
792
769
793 revlog = self.revlog
770 revlog = self.revlog
794 cachedelta = revinfo.cachedelta
771 cachedelta = revinfo.cachedelta
795 baserev = cachedelta[0]
772 baserev = cachedelta[0]
796 delta = cachedelta[1]
773 delta = cachedelta[1]
797
774
798 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
775 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
799 revinfo.p1, revinfo.p2,
776 revinfo.p1, revinfo.p2,
800 revinfo.flags, revinfo.node)
777 revinfo.flags, revinfo.node)
801 return fulltext
778 return fulltext
802
779
803 def _builddeltadiff(self, base, revinfo, fh):
780 def _builddeltadiff(self, base, revinfo, fh):
804 revlog = self.revlog
781 revlog = self.revlog
805 t = self.buildtext(revinfo, fh)
782 t = self.buildtext(revinfo, fh)
806 if revlog.iscensored(base):
783 if revlog.iscensored(base):
807 # deltas based on a censored revision must replace the
784 # deltas based on a censored revision must replace the
808 # full content in one patch, so delta works everywhere
785 # full content in one patch, so delta works everywhere
809 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
786 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
810 delta = header + t
787 delta = header + t
811 else:
788 else:
812 ptext = revlog.revision(base, _df=fh, raw=True)
789 ptext = revlog.revision(base, _df=fh, raw=True)
813 delta = mdiff.textdiff(ptext, t)
790 delta = mdiff.textdiff(ptext, t)
814
791
815 return delta
792 return delta
816
793
817 def _builddeltainfo(self, revinfo, base, fh):
794 def _builddeltainfo(self, revinfo, base, fh):
818 # can we use the cached delta?
795 # can we use the cached delta?
819 delta = None
796 delta = None
820 if revinfo.cachedelta:
797 if revinfo.cachedelta:
821 cachebase, cachediff = revinfo.cachedelta
798 cachebase, cachediff = revinfo.cachedelta
822 #check if the diff still apply
799 #check if the diff still apply
823 currentbase = cachebase
800 currentbase = cachebase
824 while (currentbase != nullrev
801 while (currentbase != nullrev
825 and currentbase != base
802 and currentbase != base
826 and self.revlog.length(currentbase) == 0):
803 and self.revlog.length(currentbase) == 0):
827 currentbase = self.revlog.deltaparent(currentbase)
804 currentbase = self.revlog.deltaparent(currentbase)
828 if currentbase == base:
805 if currentbase == base:
829 delta = revinfo.cachedelta[1]
806 delta = revinfo.cachedelta[1]
830 if delta is None:
807 if delta is None:
831 delta = self._builddeltadiff(base, revinfo, fh)
808 delta = self._builddeltadiff(base, revinfo, fh)
832 revlog = self.revlog
809 revlog = self.revlog
833 header, data = revlog.compress(delta)
810 header, data = revlog.compress(delta)
834 deltalen = len(header) + len(data)
811 deltalen = len(header) + len(data)
835 chainbase = revlog.chainbase(base)
812 chainbase = revlog.chainbase(base)
836 offset = revlog.end(len(revlog) - 1)
813 offset = revlog.end(len(revlog) - 1)
837 dist = deltalen + offset - revlog.start(chainbase)
814 dist = deltalen + offset - revlog.start(chainbase)
838 if revlog._generaldelta:
815 if revlog._generaldelta:
839 deltabase = base
816 deltabase = base
840 else:
817 else:
841 deltabase = chainbase
818 deltabase = chainbase
842 chainlen, compresseddeltalen = revlog._chaininfo(base)
819 chainlen, compresseddeltalen = revlog._chaininfo(base)
843 chainlen += 1
820 chainlen += 1
844 compresseddeltalen += deltalen
821 compresseddeltalen += deltalen
845
822
846 revlog = self.revlog
823 revlog = self.revlog
847 snapshotdepth = None
824 snapshotdepth = None
848 if deltabase == nullrev:
825 if deltabase == nullrev:
849 snapshotdepth = 0
826 snapshotdepth = 0
850 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
827 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
851 # A delta chain should always be one full snapshot,
828 # A delta chain should always be one full snapshot,
852 # zero or more semi-snapshots, and zero or more deltas
829 # zero or more semi-snapshots, and zero or more deltas
853 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
830 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
854 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
831 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
855 snapshotdepth = len(revlog._deltachain(deltabase)[0])
832 snapshotdepth = len(revlog._deltachain(deltabase)[0])
856
833
857 return _deltainfo(dist, deltalen, (header, data), deltabase,
834 return _deltainfo(dist, deltalen, (header, data), deltabase,
858 chainbase, chainlen, compresseddeltalen,
835 chainbase, chainlen, compresseddeltalen,
859 snapshotdepth)
836 snapshotdepth)
860
837
861 def _fullsnapshotinfo(self, fh, revinfo):
838 def _fullsnapshotinfo(self, fh, revinfo):
862 curr = len(self.revlog)
839 curr = len(self.revlog)
863 rawtext = self.buildtext(revinfo, fh)
840 rawtext = self.buildtext(revinfo, fh)
864 data = self.revlog.compress(rawtext)
841 data = self.revlog.compress(rawtext)
865 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
842 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
866 deltabase = chainbase = curr
843 deltabase = chainbase = curr
867 snapshotdepth = 0
844 snapshotdepth = 0
868 chainlen = 1
845 chainlen = 1
869
846
870 return _deltainfo(dist, deltalen, data, deltabase,
847 return _deltainfo(dist, deltalen, data, deltabase,
871 chainbase, chainlen, compresseddeltalen,
848 chainbase, chainlen, compresseddeltalen,
872 snapshotdepth)
849 snapshotdepth)
873
850
874 def finddeltainfo(self, revinfo, fh):
851 def finddeltainfo(self, revinfo, fh):
875 """Find an acceptable delta against a candidate revision
852 """Find an acceptable delta against a candidate revision
876
853
877 revinfo: information about the revision (instance of _revisioninfo)
854 revinfo: information about the revision (instance of _revisioninfo)
878 fh: file handle to either the .i or the .d revlog file,
855 fh: file handle to either the .i or the .d revlog file,
879 depending on whether it is inlined or not
856 depending on whether it is inlined or not
880
857
881 Returns the first acceptable candidate revision, as ordered by
858 Returns the first acceptable candidate revision, as ordered by
882 _candidategroups
859 _candidategroups
883
860
884 If no suitable deltabase is found, we return delta info for a full
861 If no suitable deltabase is found, we return delta info for a full
885 snapshot.
862 snapshot.
886 """
863 """
887 if not revinfo.textlen:
864 if not revinfo.textlen:
888 return self._fullsnapshotinfo(fh, revinfo)
865 return self._fullsnapshotinfo(fh, revinfo)
889
866
890 # no delta for flag processor revision (see "candelta" for why)
867 # no delta for flag processor revision (see "candelta" for why)
891 # not calling candelta since only one revision needs test, also to
868 # not calling candelta since only one revision needs test, also to
892 # avoid overhead fetching flags again.
869 # avoid overhead fetching flags again.
893 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
870 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
894 return self._fullsnapshotinfo(fh, revinfo)
871 return self._fullsnapshotinfo(fh, revinfo)
895
872
896 cachedelta = revinfo.cachedelta
873 cachedelta = revinfo.cachedelta
897 p1 = revinfo.p1
874 p1 = revinfo.p1
898 p2 = revinfo.p2
875 p2 = revinfo.p2
899 revlog = self.revlog
876 revlog = self.revlog
900
877
901 deltainfo = None
878 deltainfo = None
902 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
879 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
903 groups = _candidategroups(self.revlog, revinfo.textlen,
880 groups = _candidategroups(self.revlog, revinfo.textlen,
904 p1r, p2r, cachedelta)
881 p1r, p2r, cachedelta)
905 candidaterevs = next(groups)
882 candidaterevs = next(groups)
906 while candidaterevs is not None:
883 while candidaterevs is not None:
907 nominateddeltas = []
884 nominateddeltas = []
908 if deltainfo is not None:
885 if deltainfo is not None:
909 # if we already found a good delta,
886 # if we already found a good delta,
910 # challenge it against refined candidates
887 # challenge it against refined candidates
911 nominateddeltas.append(deltainfo)
888 nominateddeltas.append(deltainfo)
912 for candidaterev in candidaterevs:
889 for candidaterev in candidaterevs:
913 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
890 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
914 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
891 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
915 nominateddeltas.append(candidatedelta)
892 nominateddeltas.append(candidatedelta)
916 if nominateddeltas:
893 if nominateddeltas:
917 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
894 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
918 if deltainfo is not None:
895 if deltainfo is not None:
919 candidaterevs = groups.send(deltainfo.base)
896 candidaterevs = groups.send(deltainfo.base)
920 else:
897 else:
921 candidaterevs = next(groups)
898 candidaterevs = next(groups)
922
899
923 if deltainfo is None:
900 if deltainfo is None:
924 deltainfo = self._fullsnapshotinfo(fh, revinfo)
901 deltainfo = self._fullsnapshotinfo(fh, revinfo)
925 return deltainfo
902 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now