##// END OF EJS Templates
sparse-revlog: drop unused deltainfo parameter from segmentspan...
Boris Feld -
r40641:a32ccd32 default
parent child Browse files
Show More
@@ -1,902 +1,896 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import heapq
13 import heapq
14 import struct
14 import struct
15
15
16 # import stuff from node for others to import from revlog
16 # import stuff from node for others to import from revlog
17 from ..node import (
17 from ..node import (
18 nullrev,
18 nullrev,
19 )
19 )
20 from ..i18n import _
20 from ..i18n import _
21
21
22 from .constants import (
22 from .constants import (
23 REVIDX_ISCENSORED,
23 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
25 )
26
26
27 from ..thirdparty import (
27 from ..thirdparty import (
28 attr,
28 attr,
29 )
29 )
30
30
31 from .. import (
31 from .. import (
32 error,
32 error,
33 mdiff,
33 mdiff,
34 )
34 )
35
35
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0):
42 def __init__(self, data, density=0.5, mingap=0):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47
47
48 def start(self, rev):
48 def start(self, rev):
49 if rev == 0:
49 if rev == 0:
50 return 0
50 return 0
51 return self._data[rev - 1]
51 return self._data[rev - 1]
52
52
53 def end(self, rev):
53 def end(self, rev):
54 return self._data[rev]
54 return self._data[rev]
55
55
56 def length(self, rev):
56 def length(self, rev):
57 return self.end(rev) - self.start(rev)
57 return self.end(rev) - self.start(rev)
58
58
59 def __len__(self):
59 def __len__(self):
60 return len(self._data)
60 return len(self._data)
61
61
62 def slicechunk(revlog, revs, targetsize=None):
62 def slicechunk(revlog, revs, targetsize=None):
63 """slice revs to reduce the amount of unrelated data to be read from disk.
63 """slice revs to reduce the amount of unrelated data to be read from disk.
64
64
65 ``revs`` is sliced into groups that should be read in one time.
65 ``revs`` is sliced into groups that should be read in one time.
66 Assume that revs are sorted.
66 Assume that revs are sorted.
67
67
68 The initial chunk is sliced until the overall density (payload/chunks-span
68 The initial chunk is sliced until the overall density (payload/chunks-span
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
70 `revlog._srmingapsize` is skipped.
70 `revlog._srmingapsize` is skipped.
71
71
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
73 For consistency with other slicing choice, this limit won't go lower than
73 For consistency with other slicing choice, this limit won't go lower than
74 `revlog._srmingapsize`.
74 `revlog._srmingapsize`.
75
75
76 If individual revisions chunk are larger than this limit, they will still
76 If individual revisions chunk are larger than this limit, they will still
77 be raised individually.
77 be raised individually.
78
78
79 >>> revlog = _testrevlog([
79 >>> revlog = _testrevlog([
80 ... 5, #00 (5)
80 ... 5, #00 (5)
81 ... 10, #01 (5)
81 ... 10, #01 (5)
82 ... 12, #02 (2)
82 ... 12, #02 (2)
83 ... 12, #03 (empty)
83 ... 12, #03 (empty)
84 ... 27, #04 (15)
84 ... 27, #04 (15)
85 ... 31, #05 (4)
85 ... 31, #05 (4)
86 ... 31, #06 (empty)
86 ... 31, #06 (empty)
87 ... 42, #07 (11)
87 ... 42, #07 (11)
88 ... 47, #08 (5)
88 ... 47, #08 (5)
89 ... 47, #09 (empty)
89 ... 47, #09 (empty)
90 ... 48, #10 (1)
90 ... 48, #10 (1)
91 ... 51, #11 (3)
91 ... 51, #11 (3)
92 ... 74, #12 (23)
92 ... 74, #12 (23)
93 ... 85, #13 (11)
93 ... 85, #13 (11)
94 ... 86, #14 (1)
94 ... 86, #14 (1)
95 ... 91, #15 (5)
95 ... 91, #15 (5)
96 ... ])
96 ... ])
97
97
98 >>> list(slicechunk(revlog, list(range(16))))
98 >>> list(slicechunk(revlog, list(range(16))))
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
100 >>> list(slicechunk(revlog, [0, 15]))
100 >>> list(slicechunk(revlog, [0, 15]))
101 [[0], [15]]
101 [[0], [15]]
102 >>> list(slicechunk(revlog, [0, 11, 15]))
102 >>> list(slicechunk(revlog, [0, 11, 15]))
103 [[0], [11], [15]]
103 [[0], [11], [15]]
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
105 [[0], [11, 13, 15]]
105 [[0], [11, 13, 15]]
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
107 [[1, 2], [5, 8, 10, 11], [14]]
107 [[1, 2], [5, 8, 10, 11], [14]]
108
108
109 Slicing with a maximum chunk size
109 Slicing with a maximum chunk size
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
111 [[0], [11], [13], [15]]
111 [[0], [11], [13], [15]]
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
113 [[0], [11], [13, 15]]
113 [[0], [11], [13, 15]]
114 """
114 """
115 if targetsize is not None:
115 if targetsize is not None:
116 targetsize = max(targetsize, revlog._srmingapsize)
116 targetsize = max(targetsize, revlog._srmingapsize)
117 # targetsize should not be specified when evaluating delta candidates:
117 # targetsize should not be specified when evaluating delta candidates:
118 # * targetsize is used to ensure we stay within specification when reading,
118 # * targetsize is used to ensure we stay within specification when reading,
119 for chunk in _slicechunktodensity(revlog, revs,
119 for chunk in _slicechunktodensity(revlog, revs,
120 revlog._srdensitythreshold,
120 revlog._srdensitythreshold,
121 revlog._srmingapsize):
121 revlog._srmingapsize):
122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
123 yield subchunk
123 yield subchunk
124
124
125 def _slicechunktosize(revlog, revs, targetsize=None):
125 def _slicechunktosize(revlog, revs, targetsize=None):
126 """slice revs to match the target size
126 """slice revs to match the target size
127
127
128 This is intended to be used on chunk that density slicing selected by that
128 This is intended to be used on chunk that density slicing selected by that
129 are still too large compared to the read garantee of revlog. This might
129 are still too large compared to the read garantee of revlog. This might
130 happens when "minimal gap size" interrupted the slicing or when chain are
130 happens when "minimal gap size" interrupted the slicing or when chain are
131 built in a way that create large blocks next to each other.
131 built in a way that create large blocks next to each other.
132
132
133 >>> revlog = _testrevlog([
133 >>> revlog = _testrevlog([
134 ... 3, #0 (3)
134 ... 3, #0 (3)
135 ... 5, #1 (2)
135 ... 5, #1 (2)
136 ... 6, #2 (1)
136 ... 6, #2 (1)
137 ... 8, #3 (2)
137 ... 8, #3 (2)
138 ... 8, #4 (empty)
138 ... 8, #4 (empty)
139 ... 11, #5 (3)
139 ... 11, #5 (3)
140 ... 12, #6 (1)
140 ... 12, #6 (1)
141 ... 13, #7 (1)
141 ... 13, #7 (1)
142 ... 14, #8 (1)
142 ... 14, #8 (1)
143 ... ])
143 ... ])
144
144
145 Cases where chunk is already small enough
145 Cases where chunk is already small enough
146 >>> list(_slicechunktosize(revlog, [0], 3))
146 >>> list(_slicechunktosize(revlog, [0], 3))
147 [[0]]
147 [[0]]
148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
149 [[6, 7]]
149 [[6, 7]]
150 >>> list(_slicechunktosize(revlog, [0], None))
150 >>> list(_slicechunktosize(revlog, [0], None))
151 [[0]]
151 [[0]]
152 >>> list(_slicechunktosize(revlog, [6, 7], None))
152 >>> list(_slicechunktosize(revlog, [6, 7], None))
153 [[6, 7]]
153 [[6, 7]]
154
154
155 cases where we need actual slicing
155 cases where we need actual slicing
156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
157 [[0], [1]]
157 [[0], [1]]
158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
159 [[1], [3]]
159 [[1], [3]]
160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
161 [[1, 2], [3]]
161 [[1, 2], [3]]
162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
163 [[3], [5]]
163 [[3], [5]]
164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
165 [[3], [5]]
165 [[3], [5]]
166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
167 [[5], [6, 7, 8]]
167 [[5], [6, 7, 8]]
168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
169 [[0], [1, 2], [3], [5], [6, 7, 8]]
169 [[0], [1, 2], [3], [5], [6, 7, 8]]
170
170
171 Case with too large individual chunk (must return valid chunk)
171 Case with too large individual chunk (must return valid chunk)
172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
173 [[0], [1]]
173 [[0], [1]]
174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
175 [[1], [3]]
175 [[1], [3]]
176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
177 [[3], [5]]
177 [[3], [5]]
178 """
178 """
179 assert targetsize is None or 0 <= targetsize
179 assert targetsize is None or 0 <= targetsize
180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
181 yield revs
181 yield revs
182 return
182 return
183
183
184 startrevidx = 0
184 startrevidx = 0
185 startdata = revlog.start(revs[0])
185 startdata = revlog.start(revs[0])
186 endrevidx = 0
186 endrevidx = 0
187 iterrevs = enumerate(revs)
187 iterrevs = enumerate(revs)
188 next(iterrevs) # skip first rev.
188 next(iterrevs) # skip first rev.
189 for idx, r in iterrevs:
189 for idx, r in iterrevs:
190 span = revlog.end(r) - startdata
190 span = revlog.end(r) - startdata
191 if span <= targetsize:
191 if span <= targetsize:
192 endrevidx = idx
192 endrevidx = idx
193 else:
193 else:
194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
195 if chunk:
195 if chunk:
196 yield chunk
196 yield chunk
197 startrevidx = idx
197 startrevidx = idx
198 startdata = revlog.start(r)
198 startdata = revlog.start(r)
199 endrevidx = idx
199 endrevidx = idx
200 yield _trimchunk(revlog, revs, startrevidx)
200 yield _trimchunk(revlog, revs, startrevidx)
201
201
202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
203 mingapsize=0):
203 mingapsize=0):
204 """slice revs to reduce the amount of unrelated data to be read from disk.
204 """slice revs to reduce the amount of unrelated data to be read from disk.
205
205
206 ``revs`` is sliced into groups that should be read in one time.
206 ``revs`` is sliced into groups that should be read in one time.
207 Assume that revs are sorted.
207 Assume that revs are sorted.
208
208
209 The initial chunk is sliced until the overall density (payload/chunks-span
209 The initial chunk is sliced until the overall density (payload/chunks-span
210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
211 skipped.
211 skipped.
212
212
213 >>> revlog = _testrevlog([
213 >>> revlog = _testrevlog([
214 ... 5, #00 (5)
214 ... 5, #00 (5)
215 ... 10, #01 (5)
215 ... 10, #01 (5)
216 ... 12, #02 (2)
216 ... 12, #02 (2)
217 ... 12, #03 (empty)
217 ... 12, #03 (empty)
218 ... 27, #04 (15)
218 ... 27, #04 (15)
219 ... 31, #05 (4)
219 ... 31, #05 (4)
220 ... 31, #06 (empty)
220 ... 31, #06 (empty)
221 ... 42, #07 (11)
221 ... 42, #07 (11)
222 ... 47, #08 (5)
222 ... 47, #08 (5)
223 ... 47, #09 (empty)
223 ... 47, #09 (empty)
224 ... 48, #10 (1)
224 ... 48, #10 (1)
225 ... 51, #11 (3)
225 ... 51, #11 (3)
226 ... 74, #12 (23)
226 ... 74, #12 (23)
227 ... 85, #13 (11)
227 ... 85, #13 (11)
228 ... 86, #14 (1)
228 ... 86, #14 (1)
229 ... 91, #15 (5)
229 ... 91, #15 (5)
230 ... ])
230 ... ])
231
231
232 >>> list(_slicechunktodensity(revlog, list(range(16))))
232 >>> list(_slicechunktodensity(revlog, list(range(16))))
233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
234 >>> list(_slicechunktodensity(revlog, [0, 15]))
234 >>> list(_slicechunktodensity(revlog, [0, 15]))
235 [[0], [15]]
235 [[0], [15]]
236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
237 [[0], [11], [15]]
237 [[0], [11], [15]]
238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
239 [[0], [11, 13, 15]]
239 [[0], [11, 13, 15]]
240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
241 [[1, 2], [5, 8, 10, 11], [14]]
241 [[1, 2], [5, 8, 10, 11], [14]]
242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
243 ... mingapsize=20))
243 ... mingapsize=20))
244 [[1, 2, 3, 5, 8, 10, 11], [14]]
244 [[1, 2, 3, 5, 8, 10, 11], [14]]
245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
246 ... targetdensity=0.95))
246 ... targetdensity=0.95))
247 [[1, 2], [5], [8, 10, 11], [14]]
247 [[1, 2], [5], [8, 10, 11], [14]]
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
249 ... targetdensity=0.95, mingapsize=12))
249 ... targetdensity=0.95, mingapsize=12))
250 [[1, 2], [5, 8, 10, 11], [14]]
250 [[1, 2], [5, 8, 10, 11], [14]]
251 """
251 """
252 start = revlog.start
252 start = revlog.start
253 length = revlog.length
253 length = revlog.length
254
254
255 if len(revs) <= 1:
255 if len(revs) <= 1:
256 yield revs
256 yield revs
257 return
257 return
258
258
259 deltachainspan = segmentspan(revlog, revs)
259 deltachainspan = segmentspan(revlog, revs)
260 chainpayload = sum(length(r) for r in revs)
260 chainpayload = sum(length(r) for r in revs)
261
261
262 if deltachainspan < mingapsize:
262 if deltachainspan < mingapsize:
263 yield revs
263 yield revs
264 return
264 return
265
265
266 readdata = deltachainspan
266 readdata = deltachainspan
267
267
268 if deltachainspan:
268 if deltachainspan:
269 density = chainpayload / float(deltachainspan)
269 density = chainpayload / float(deltachainspan)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 if density >= targetdensity:
273 if density >= targetdensity:
274 yield revs
274 yield revs
275 return
275 return
276
276
277 # Store the gaps in a heap to have them sorted by decreasing size
277 # Store the gaps in a heap to have them sorted by decreasing size
278 gapsheap = []
278 gapsheap = []
279 heapq.heapify(gapsheap)
279 heapq.heapify(gapsheap)
280 prevend = None
280 prevend = None
281 for i, rev in enumerate(revs):
281 for i, rev in enumerate(revs):
282 revstart = start(rev)
282 revstart = start(rev)
283 revlen = length(rev)
283 revlen = length(rev)
284
284
285 # Skip empty revisions to form larger holes
285 # Skip empty revisions to form larger holes
286 if revlen == 0:
286 if revlen == 0:
287 continue
287 continue
288
288
289 if prevend is not None:
289 if prevend is not None:
290 gapsize = revstart - prevend
290 gapsize = revstart - prevend
291 # only consider holes that are large enough
291 # only consider holes that are large enough
292 if gapsize > mingapsize:
292 if gapsize > mingapsize:
293 heapq.heappush(gapsheap, (-gapsize, i))
293 heapq.heappush(gapsheap, (-gapsize, i))
294
294
295 prevend = revstart + revlen
295 prevend = revstart + revlen
296
296
297 # Collect the indices of the largest holes until the density is acceptable
297 # Collect the indices of the largest holes until the density is acceptable
298 indicesheap = []
298 indicesheap = []
299 heapq.heapify(indicesheap)
299 heapq.heapify(indicesheap)
300 while gapsheap and density < targetdensity:
300 while gapsheap and density < targetdensity:
301 oppgapsize, gapidx = heapq.heappop(gapsheap)
301 oppgapsize, gapidx = heapq.heappop(gapsheap)
302
302
303 heapq.heappush(indicesheap, gapidx)
303 heapq.heappush(indicesheap, gapidx)
304
304
305 # the gap sizes are stored as negatives to be sorted decreasingly
305 # the gap sizes are stored as negatives to be sorted decreasingly
306 # by the heap
306 # by the heap
307 readdata -= (-oppgapsize)
307 readdata -= (-oppgapsize)
308 if readdata > 0:
308 if readdata > 0:
309 density = chainpayload / float(readdata)
309 density = chainpayload / float(readdata)
310 else:
310 else:
311 density = 1.0
311 density = 1.0
312
312
313 # Cut the revs at collected indices
313 # Cut the revs at collected indices
314 previdx = 0
314 previdx = 0
315 while indicesheap:
315 while indicesheap:
316 idx = heapq.heappop(indicesheap)
316 idx = heapq.heappop(indicesheap)
317
317
318 chunk = _trimchunk(revlog, revs, previdx, idx)
318 chunk = _trimchunk(revlog, revs, previdx, idx)
319 if chunk:
319 if chunk:
320 yield chunk
320 yield chunk
321
321
322 previdx = idx
322 previdx = idx
323
323
324 chunk = _trimchunk(revlog, revs, previdx)
324 chunk = _trimchunk(revlog, revs, previdx)
325 if chunk:
325 if chunk:
326 yield chunk
326 yield chunk
327
327
328 def _trimchunk(revlog, revs, startidx, endidx=None):
328 def _trimchunk(revlog, revs, startidx, endidx=None):
329 """returns revs[startidx:endidx] without empty trailing revs
329 """returns revs[startidx:endidx] without empty trailing revs
330
330
331 Doctest Setup
331 Doctest Setup
332 >>> revlog = _testrevlog([
332 >>> revlog = _testrevlog([
333 ... 5, #0
333 ... 5, #0
334 ... 10, #1
334 ... 10, #1
335 ... 12, #2
335 ... 12, #2
336 ... 12, #3 (empty)
336 ... 12, #3 (empty)
337 ... 17, #4
337 ... 17, #4
338 ... 21, #5
338 ... 21, #5
339 ... 21, #6 (empty)
339 ... 21, #6 (empty)
340 ... ])
340 ... ])
341
341
342 Contiguous cases:
342 Contiguous cases:
343 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
343 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
344 [0, 1, 2, 3, 4, 5]
344 [0, 1, 2, 3, 4, 5]
345 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
345 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
346 [0, 1, 2, 3, 4]
346 [0, 1, 2, 3, 4]
347 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
347 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
348 [0, 1, 2]
348 [0, 1, 2]
349 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
349 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
350 [2]
350 [2]
351 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
351 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
352 [3, 4, 5]
352 [3, 4, 5]
353 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
353 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
354 [3, 4]
354 [3, 4]
355
355
356 Discontiguous cases:
356 Discontiguous cases:
357 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
357 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
358 [1, 3, 5]
358 [1, 3, 5]
359 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
359 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
360 [1]
360 [1]
361 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
361 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
362 [3, 5]
362 [3, 5]
363 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
363 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
364 [3, 5]
364 [3, 5]
365 """
365 """
366 length = revlog.length
366 length = revlog.length
367
367
368 if endidx is None:
368 if endidx is None:
369 endidx = len(revs)
369 endidx = len(revs)
370
370
371 # If we have a non-emtpy delta candidate, there are nothing to trim
371 # If we have a non-emtpy delta candidate, there are nothing to trim
372 if revs[endidx - 1] < len(revlog):
372 if revs[endidx - 1] < len(revlog):
373 # Trim empty revs at the end, except the very first revision of a chain
373 # Trim empty revs at the end, except the very first revision of a chain
374 while (endidx > 1
374 while (endidx > 1
375 and endidx > startidx
375 and endidx > startidx
376 and length(revs[endidx - 1]) == 0):
376 and length(revs[endidx - 1]) == 0):
377 endidx -= 1
377 endidx -= 1
378
378
379 return revs[startidx:endidx]
379 return revs[startidx:endidx]
380
380
381 def segmentspan(revlog, revs, deltainfo=None):
381 def segmentspan(revlog, revs):
382 """Get the byte span of a segment of revisions
382 """Get the byte span of a segment of revisions
383
383
384 revs is a sorted array of revision numbers
384 revs is a sorted array of revision numbers
385
385
386 >>> revlog = _testrevlog([
386 >>> revlog = _testrevlog([
387 ... 5, #0
387 ... 5, #0
388 ... 10, #1
388 ... 10, #1
389 ... 12, #2
389 ... 12, #2
390 ... 12, #3 (empty)
390 ... 12, #3 (empty)
391 ... 17, #4
391 ... 17, #4
392 ... ])
392 ... ])
393
393
394 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
394 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
395 17
395 17
396 >>> segmentspan(revlog, [0, 4])
396 >>> segmentspan(revlog, [0, 4])
397 17
397 17
398 >>> segmentspan(revlog, [3, 4])
398 >>> segmentspan(revlog, [3, 4])
399 5
399 5
400 >>> segmentspan(revlog, [1, 2, 3,])
400 >>> segmentspan(revlog, [1, 2, 3,])
401 7
401 7
402 >>> segmentspan(revlog, [1, 3])
402 >>> segmentspan(revlog, [1, 3])
403 7
403 7
404 """
404 """
405 if not revs:
405 if not revs:
406 return 0
406 return 0
407 if deltainfo is not None and len(revlog) <= revs[-1]:
407 end = revlog.end(revs[-1])
408 if len(revs) == 1:
409 return deltainfo.deltalen
410 offset = revlog.end(len(revlog) - 1)
411 end = deltainfo.deltalen + offset
412 else:
413 end = revlog.end(revs[-1])
414 return end - revlog.start(revs[0])
408 return end - revlog.start(revs[0])
415
409
416 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
410 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
417 """build full text from a (base, delta) pair and other metadata"""
411 """build full text from a (base, delta) pair and other metadata"""
418 # special case deltas which replace entire base; no need to decode
412 # special case deltas which replace entire base; no need to decode
419 # base revision. this neatly avoids censored bases, which throw when
413 # base revision. this neatly avoids censored bases, which throw when
420 # they're decoded.
414 # they're decoded.
421 hlen = struct.calcsize(">lll")
415 hlen = struct.calcsize(">lll")
422 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
416 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
423 len(delta) - hlen):
417 len(delta) - hlen):
424 fulltext = delta[hlen:]
418 fulltext = delta[hlen:]
425 else:
419 else:
426 # deltabase is rawtext before changed by flag processors, which is
420 # deltabase is rawtext before changed by flag processors, which is
427 # equivalent to non-raw text
421 # equivalent to non-raw text
428 basetext = revlog.revision(baserev, _df=fh, raw=False)
422 basetext = revlog.revision(baserev, _df=fh, raw=False)
429 fulltext = mdiff.patch(basetext, delta)
423 fulltext = mdiff.patch(basetext, delta)
430
424
431 try:
425 try:
432 res = revlog._processflags(fulltext, flags, 'read', raw=True)
426 res = revlog._processflags(fulltext, flags, 'read', raw=True)
433 fulltext, validatehash = res
427 fulltext, validatehash = res
434 if validatehash:
428 if validatehash:
435 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
429 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
436 if flags & REVIDX_ISCENSORED:
430 if flags & REVIDX_ISCENSORED:
437 raise error.StorageError(_('node %s is not censored') %
431 raise error.StorageError(_('node %s is not censored') %
438 expectednode)
432 expectednode)
439 except error.CensoredNodeError:
433 except error.CensoredNodeError:
440 # must pass the censored index flag to add censored revisions
434 # must pass the censored index flag to add censored revisions
441 if not flags & REVIDX_ISCENSORED:
435 if not flags & REVIDX_ISCENSORED:
442 raise
436 raise
443 return fulltext
437 return fulltext
444
438
445 @attr.s(slots=True, frozen=True)
439 @attr.s(slots=True, frozen=True)
446 class _deltainfo(object):
440 class _deltainfo(object):
447 distance = attr.ib()
441 distance = attr.ib()
448 deltalen = attr.ib()
442 deltalen = attr.ib()
449 data = attr.ib()
443 data = attr.ib()
450 base = attr.ib()
444 base = attr.ib()
451 chainbase = attr.ib()
445 chainbase = attr.ib()
452 chainlen = attr.ib()
446 chainlen = attr.ib()
453 compresseddeltalen = attr.ib()
447 compresseddeltalen = attr.ib()
454 snapshotdepth = attr.ib()
448 snapshotdepth = attr.ib()
455
449
456 def isgooddeltainfo(revlog, deltainfo, revinfo):
450 def isgooddeltainfo(revlog, deltainfo, revinfo):
457 """Returns True if the given delta is good. Good means that it is within
451 """Returns True if the given delta is good. Good means that it is within
458 the disk span, disk size, and chain length bounds that we know to be
452 the disk span, disk size, and chain length bounds that we know to be
459 performant."""
453 performant."""
460 if deltainfo is None:
454 if deltainfo is None:
461 return False
455 return False
462
456
463 # - 'deltainfo.distance' is the distance from the base revision --
457 # - 'deltainfo.distance' is the distance from the base revision --
464 # bounding it limits the amount of I/O we need to do.
458 # bounding it limits the amount of I/O we need to do.
465 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
459 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
466 # deltas we need to apply -- bounding it limits the amount of CPU
460 # deltas we need to apply -- bounding it limits the amount of CPU
467 # we consume.
461 # we consume.
468
462
469 textlen = revinfo.textlen
463 textlen = revinfo.textlen
470 defaultmax = textlen * 4
464 defaultmax = textlen * 4
471 maxdist = revlog._maxdeltachainspan
465 maxdist = revlog._maxdeltachainspan
472 if not maxdist:
466 if not maxdist:
473 maxdist = deltainfo.distance # ensure the conditional pass
467 maxdist = deltainfo.distance # ensure the conditional pass
474 maxdist = max(maxdist, defaultmax)
468 maxdist = max(maxdist, defaultmax)
475
469
476 # Bad delta from read span:
470 # Bad delta from read span:
477 #
471 #
478 # If the span of data read is larger than the maximum allowed.
472 # If the span of data read is larger than the maximum allowed.
479 #
473 #
480 # In the sparse-revlog case, we rely on the associated "sparse reading"
474 # In the sparse-revlog case, we rely on the associated "sparse reading"
481 # to avoid issue related to the span of data. In theory, it would be
475 # to avoid issue related to the span of data. In theory, it would be
482 # possible to build pathological revlog where delta pattern would lead
476 # possible to build pathological revlog where delta pattern would lead
483 # to too many reads. However, they do not happen in practice at all. So
477 # to too many reads. However, they do not happen in practice at all. So
484 # we skip the span check entirely.
478 # we skip the span check entirely.
485 if not revlog._sparserevlog and maxdist < deltainfo.distance:
479 if not revlog._sparserevlog and maxdist < deltainfo.distance:
486 return False
480 return False
487
481
488 # Bad delta from new delta size:
482 # Bad delta from new delta size:
489 #
483 #
490 # If the delta size is larger than the target text, storing the
484 # If the delta size is larger than the target text, storing the
491 # delta will be inefficient.
485 # delta will be inefficient.
492 if textlen < deltainfo.deltalen:
486 if textlen < deltainfo.deltalen:
493 return False
487 return False
494
488
495 # Bad delta from cumulated payload size:
489 # Bad delta from cumulated payload size:
496 #
490 #
497 # If the sum of delta get larger than K * target text length.
491 # If the sum of delta get larger than K * target text length.
498 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
492 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
499 return False
493 return False
500
494
501 # Bad delta from chain length:
495 # Bad delta from chain length:
502 #
496 #
503 # If the number of delta in the chain gets too high.
497 # If the number of delta in the chain gets too high.
504 if (revlog._maxchainlen
498 if (revlog._maxchainlen
505 and revlog._maxchainlen < deltainfo.chainlen):
499 and revlog._maxchainlen < deltainfo.chainlen):
506 return False
500 return False
507
501
508 # bad delta from intermediate snapshot size limit
502 # bad delta from intermediate snapshot size limit
509 #
503 #
510 # If an intermediate snapshot size is higher than the limit. The
504 # If an intermediate snapshot size is higher than the limit. The
511 # limit exist to prevent endless chain of intermediate delta to be
505 # limit exist to prevent endless chain of intermediate delta to be
512 # created.
506 # created.
513 if (deltainfo.snapshotdepth is not None and
507 if (deltainfo.snapshotdepth is not None and
514 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
508 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
515 return False
509 return False
516
510
517 # bad delta if new intermediate snapshot is larger than the previous
511 # bad delta if new intermediate snapshot is larger than the previous
518 # snapshot
512 # snapshot
519 if (deltainfo.snapshotdepth
513 if (deltainfo.snapshotdepth
520 and revlog.length(deltainfo.base) < deltainfo.deltalen):
514 and revlog.length(deltainfo.base) < deltainfo.deltalen):
521 return False
515 return False
522
516
523 return True
517 return True
524
518
525 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
519 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
526 """Provides group of revision to be tested as delta base
520 """Provides group of revision to be tested as delta base
527
521
528 This top level function focus on emitting groups with unique and worthwhile
522 This top level function focus on emitting groups with unique and worthwhile
529 content. See _raw_candidate_groups for details about the group order.
523 content. See _raw_candidate_groups for details about the group order.
530 """
524 """
531 # should we try to build a delta?
525 # should we try to build a delta?
532 if not (len(revlog) and revlog._storedeltachains):
526 if not (len(revlog) and revlog._storedeltachains):
533 yield None
527 yield None
534 return
528 return
535
529
536 deltalength = revlog.length
530 deltalength = revlog.length
537 deltaparent = revlog.deltaparent
531 deltaparent = revlog.deltaparent
538 good = None
532 good = None
539
533
540 deltas_limit = textlen * LIMIT_DELTA2TEXT
534 deltas_limit = textlen * LIMIT_DELTA2TEXT
541
535
542 tested = set([nullrev])
536 tested = set([nullrev])
543 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
537 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
544 while True:
538 while True:
545 temptative = candidates.send(good)
539 temptative = candidates.send(good)
546 if temptative is None:
540 if temptative is None:
547 break
541 break
548 group = []
542 group = []
549 for rev in temptative:
543 for rev in temptative:
550 # skip over empty delta (no need to include them in a chain)
544 # skip over empty delta (no need to include them in a chain)
551 while (revlog._generaldelta
545 while (revlog._generaldelta
552 and not (rev == nullrev
546 and not (rev == nullrev
553 or rev in tested
547 or rev in tested
554 or deltalength(rev))):
548 or deltalength(rev))):
555 tested.add(rev)
549 tested.add(rev)
556 rev = deltaparent(rev)
550 rev = deltaparent(rev)
557 # filter out revision we tested already
551 # filter out revision we tested already
558 if rev in tested:
552 if rev in tested:
559 continue
553 continue
560 tested.add(rev)
554 tested.add(rev)
561 # filter out delta base that will never produce good delta
555 # filter out delta base that will never produce good delta
562 if deltas_limit < revlog.length(rev):
556 if deltas_limit < revlog.length(rev):
563 continue
557 continue
564 # no need to try a delta against nullrev, this will be done as a
558 # no need to try a delta against nullrev, this will be done as a
565 # last resort.
559 # last resort.
566 if rev == nullrev:
560 if rev == nullrev:
567 continue
561 continue
568 # no delta for rawtext-changing revs (see "candelta" for why)
562 # no delta for rawtext-changing revs (see "candelta" for why)
569 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
563 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
570 continue
564 continue
571 group.append(rev)
565 group.append(rev)
572 if group:
566 if group:
573 # XXX: in the sparse revlog case, group can become large,
567 # XXX: in the sparse revlog case, group can become large,
574 # impacting performances. Some bounding or slicing mecanism
568 # impacting performances. Some bounding or slicing mecanism
575 # would help to reduce this impact.
569 # would help to reduce this impact.
576 good = yield tuple(group)
570 good = yield tuple(group)
577 yield None
571 yield None
578
572
579 def _findsnapshots(revlog, cache, start_rev):
573 def _findsnapshots(revlog, cache, start_rev):
580 """find snapshot from start_rev to tip"""
574 """find snapshot from start_rev to tip"""
581 deltaparent = revlog.deltaparent
575 deltaparent = revlog.deltaparent
582 issnapshot = revlog.issnapshot
576 issnapshot = revlog.issnapshot
583 for rev in revlog.revs(start_rev):
577 for rev in revlog.revs(start_rev):
584 if issnapshot(rev):
578 if issnapshot(rev):
585 cache[deltaparent(rev)].append(rev)
579 cache[deltaparent(rev)].append(rev)
586
580
587 def _refinedgroups(revlog, p1, p2, cachedelta):
581 def _refinedgroups(revlog, p1, p2, cachedelta):
588 good = None
582 good = None
589 # First we try to reuse a the delta contained in the bundle.
583 # First we try to reuse a the delta contained in the bundle.
590 # (or from the source revlog)
584 # (or from the source revlog)
591 #
585 #
592 # This logic only applies to general delta repositories and can be disabled
586 # This logic only applies to general delta repositories and can be disabled
593 # through configuration. Disabling reuse source delta is useful when
587 # through configuration. Disabling reuse source delta is useful when
594 # we want to make sure we recomputed "optimal" deltas.
588 # we want to make sure we recomputed "optimal" deltas.
595 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
589 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
596 # Assume what we received from the server is a good choice
590 # Assume what we received from the server is a good choice
597 # build delta will reuse the cache
591 # build delta will reuse the cache
598 good = yield (cachedelta[0],)
592 good = yield (cachedelta[0],)
599 if good is not None:
593 if good is not None:
600 yield None
594 yield None
601 return
595 return
602 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
596 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
603 good = yield candidates
597 good = yield candidates
604 if good is not None:
598 if good is not None:
605 break
599 break
606
600
607 # If sparse revlog is enabled, we can try to refine the available deltas
601 # If sparse revlog is enabled, we can try to refine the available deltas
608 if not revlog._sparserevlog:
602 if not revlog._sparserevlog:
609 yield None
603 yield None
610 return
604 return
611
605
612 # if we have a refinable value, try to refine it
606 # if we have a refinable value, try to refine it
613 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
607 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
614 # refine snapshot down
608 # refine snapshot down
615 previous = None
609 previous = None
616 while previous != good:
610 while previous != good:
617 previous = good
611 previous = good
618 base = revlog.deltaparent(good)
612 base = revlog.deltaparent(good)
619 if base == nullrev:
613 if base == nullrev:
620 break
614 break
621 good = yield (base,)
615 good = yield (base,)
622 # refine snapshot up
616 # refine snapshot up
623 #
617 #
624 # XXX the _findsnapshots call can be expensive and is "duplicated" with
618 # XXX the _findsnapshots call can be expensive and is "duplicated" with
625 # the one done in `_rawgroups`. Once we start working on performance,
619 # the one done in `_rawgroups`. Once we start working on performance,
626 # we should make the two logics share this computation.
620 # we should make the two logics share this computation.
627 snapshots = collections.defaultdict(list)
621 snapshots = collections.defaultdict(list)
628 _findsnapshots(revlog, snapshots, good + 1)
622 _findsnapshots(revlog, snapshots, good + 1)
629 previous = None
623 previous = None
630 while good != previous:
624 while good != previous:
631 previous = good
625 previous = good
632 children = tuple(sorted(c for c in snapshots[good]))
626 children = tuple(sorted(c for c in snapshots[good]))
633 good = yield children
627 good = yield children
634
628
635 # we have found nothing
629 # we have found nothing
636 yield None
630 yield None
637
631
638 def _rawgroups(revlog, p1, p2, cachedelta):
632 def _rawgroups(revlog, p1, p2, cachedelta):
639 """Provides group of revision to be tested as delta base
633 """Provides group of revision to be tested as delta base
640
634
641 This lower level function focus on emitting delta theorically interresting
635 This lower level function focus on emitting delta theorically interresting
642 without looking it any practical details.
636 without looking it any practical details.
643
637
644 The group order aims at providing fast or small candidates first.
638 The group order aims at providing fast or small candidates first.
645 """
639 """
646 gdelta = revlog._generaldelta
640 gdelta = revlog._generaldelta
647 sparse = revlog._sparserevlog
641 sparse = revlog._sparserevlog
648 curr = len(revlog)
642 curr = len(revlog)
649 prev = curr - 1
643 prev = curr - 1
650 deltachain = lambda rev: revlog._deltachain(rev)[0]
644 deltachain = lambda rev: revlog._deltachain(rev)[0]
651
645
652 if gdelta:
646 if gdelta:
653 # exclude already lazy tested base if any
647 # exclude already lazy tested base if any
654 parents = [p for p in (p1, p2) if p != nullrev]
648 parents = [p for p in (p1, p2) if p != nullrev]
655
649
656 if not revlog._deltabothparents and len(parents) == 2:
650 if not revlog._deltabothparents and len(parents) == 2:
657 parents.sort()
651 parents.sort()
658 # To minimize the chance of having to build a fulltext,
652 # To minimize the chance of having to build a fulltext,
659 # pick first whichever parent is closest to us (max rev)
653 # pick first whichever parent is closest to us (max rev)
660 yield (parents[1],)
654 yield (parents[1],)
661 # then the other one (min rev) if the first did not fit
655 # then the other one (min rev) if the first did not fit
662 yield (parents[0],)
656 yield (parents[0],)
663 elif len(parents) > 0:
657 elif len(parents) > 0:
664 # Test all parents (1 or 2), and keep the best candidate
658 # Test all parents (1 or 2), and keep the best candidate
665 yield parents
659 yield parents
666
660
667 if sparse and parents:
661 if sparse and parents:
668 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
662 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
669 # See if we can use an existing snapshot in the parent chains to use as
663 # See if we can use an existing snapshot in the parent chains to use as
670 # a base for a new intermediate-snapshot
664 # a base for a new intermediate-snapshot
671 #
665 #
672 # search for snapshot in parents delta chain
666 # search for snapshot in parents delta chain
673 # map: snapshot-level: snapshot-rev
667 # map: snapshot-level: snapshot-rev
674 parents_snaps = collections.defaultdict(set)
668 parents_snaps = collections.defaultdict(set)
675 candidate_chains = [deltachain(p) for p in parents]
669 candidate_chains = [deltachain(p) for p in parents]
676 for chain in candidate_chains:
670 for chain in candidate_chains:
677 for idx, s in enumerate(chain):
671 for idx, s in enumerate(chain):
678 if not revlog.issnapshot(s):
672 if not revlog.issnapshot(s):
679 break
673 break
680 parents_snaps[idx].add(s)
674 parents_snaps[idx].add(s)
681 snapfloor = min(parents_snaps[0]) + 1
675 snapfloor = min(parents_snaps[0]) + 1
682 _findsnapshots(revlog, snapshots, snapfloor)
676 _findsnapshots(revlog, snapshots, snapfloor)
683 # search for the highest "unrelated" revision
677 # search for the highest "unrelated" revision
684 #
678 #
685 # Adding snapshots used by "unrelated" revision increase the odd we
679 # Adding snapshots used by "unrelated" revision increase the odd we
686 # reuse an independant, yet better snapshot chain.
680 # reuse an independant, yet better snapshot chain.
687 #
681 #
688 # XXX instead of building a set of revisions, we could lazily enumerate
682 # XXX instead of building a set of revisions, we could lazily enumerate
689 # over the chains. That would be more efficient, however we stick to
683 # over the chains. That would be more efficient, however we stick to
690 # simple code for now.
684 # simple code for now.
691 all_revs = set()
685 all_revs = set()
692 for chain in candidate_chains:
686 for chain in candidate_chains:
693 all_revs.update(chain)
687 all_revs.update(chain)
694 other = None
688 other = None
695 for r in revlog.revs(prev, snapfloor):
689 for r in revlog.revs(prev, snapfloor):
696 if r not in all_revs:
690 if r not in all_revs:
697 other = r
691 other = r
698 break
692 break
699 if other is not None:
693 if other is not None:
700 # To avoid unfair competition, we won't use unrelated intermediate
694 # To avoid unfair competition, we won't use unrelated intermediate
701 # snapshot that are deeper than the ones from the parent delta
695 # snapshot that are deeper than the ones from the parent delta
702 # chain.
696 # chain.
703 max_depth = max(parents_snaps.keys())
697 max_depth = max(parents_snaps.keys())
704 chain = deltachain(other)
698 chain = deltachain(other)
705 for idx, s in enumerate(chain):
699 for idx, s in enumerate(chain):
706 if s < snapfloor:
700 if s < snapfloor:
707 continue
701 continue
708 if max_depth < idx:
702 if max_depth < idx:
709 break
703 break
710 if not revlog.issnapshot(s):
704 if not revlog.issnapshot(s):
711 break
705 break
712 parents_snaps[idx].add(s)
706 parents_snaps[idx].add(s)
713 # Test them as possible intermediate snapshot base
707 # Test them as possible intermediate snapshot base
714 # We test them from highest to lowest level. High level one are more
708 # We test them from highest to lowest level. High level one are more
715 # likely to result in small delta
709 # likely to result in small delta
716 floor = None
710 floor = None
717 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
711 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
718 siblings = set()
712 siblings = set()
719 for s in snaps:
713 for s in snaps:
720 siblings.update(snapshots[s])
714 siblings.update(snapshots[s])
721 # Before considering making a new intermediate snapshot, we check
715 # Before considering making a new intermediate snapshot, we check
722 # if an existing snapshot, children of base we consider, would be
716 # if an existing snapshot, children of base we consider, would be
723 # suitable.
717 # suitable.
724 #
718 #
725 # It give a change to reuse a delta chain "unrelated" to the
719 # It give a change to reuse a delta chain "unrelated" to the
726 # current revision instead of starting our own. Without such
720 # current revision instead of starting our own. Without such
727 # re-use, topological branches would keep reopening new chains.
721 # re-use, topological branches would keep reopening new chains.
728 # Creating more and more snapshot as the repository grow.
722 # Creating more and more snapshot as the repository grow.
729
723
730 if floor is not None:
724 if floor is not None:
731 # We only do this for siblings created after the one in our
725 # We only do this for siblings created after the one in our
732 # parent's delta chain. Those created before has less chances
726 # parent's delta chain. Those created before has less chances
733 # to be valid base since our ancestors had to create a new
727 # to be valid base since our ancestors had to create a new
734 # snapshot.
728 # snapshot.
735 siblings = [r for r in siblings if floor < r]
729 siblings = [r for r in siblings if floor < r]
736 yield tuple(sorted(siblings))
730 yield tuple(sorted(siblings))
737 # then test the base from our parent's delta chain.
731 # then test the base from our parent's delta chain.
738 yield tuple(sorted(snaps))
732 yield tuple(sorted(snaps))
739 floor = min(snaps)
733 floor = min(snaps)
740 # No suitable base found in the parent chain, search if any full
734 # No suitable base found in the parent chain, search if any full
741 # snapshots emitted since parent's base would be a suitable base for an
735 # snapshots emitted since parent's base would be a suitable base for an
742 # intermediate snapshot.
736 # intermediate snapshot.
743 #
737 #
744 # It give a chance to reuse a delta chain unrelated to the current
738 # It give a chance to reuse a delta chain unrelated to the current
745 # revisions instead of starting our own. Without such re-use,
739 # revisions instead of starting our own. Without such re-use,
746 # topological branches would keep reopening new full chains. Creating
740 # topological branches would keep reopening new full chains. Creating
747 # more and more snapshot as the repository grow.
741 # more and more snapshot as the repository grow.
748 yield tuple(snapshots[nullrev])
742 yield tuple(snapshots[nullrev])
749
743
750 if not sparse:
744 if not sparse:
751 # other approach failed try against prev to hopefully save us a
745 # other approach failed try against prev to hopefully save us a
752 # fulltext.
746 # fulltext.
753 yield (prev,)
747 yield (prev,)
754
748
755 class deltacomputer(object):
749 class deltacomputer(object):
756 def __init__(self, revlog):
750 def __init__(self, revlog):
757 self.revlog = revlog
751 self.revlog = revlog
758
752
759 def buildtext(self, revinfo, fh):
753 def buildtext(self, revinfo, fh):
760 """Builds a fulltext version of a revision
754 """Builds a fulltext version of a revision
761
755
762 revinfo: _revisioninfo instance that contains all needed info
756 revinfo: _revisioninfo instance that contains all needed info
763 fh: file handle to either the .i or the .d revlog file,
757 fh: file handle to either the .i or the .d revlog file,
764 depending on whether it is inlined or not
758 depending on whether it is inlined or not
765 """
759 """
766 btext = revinfo.btext
760 btext = revinfo.btext
767 if btext[0] is not None:
761 if btext[0] is not None:
768 return btext[0]
762 return btext[0]
769
763
770 revlog = self.revlog
764 revlog = self.revlog
771 cachedelta = revinfo.cachedelta
765 cachedelta = revinfo.cachedelta
772 baserev = cachedelta[0]
766 baserev = cachedelta[0]
773 delta = cachedelta[1]
767 delta = cachedelta[1]
774
768
775 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
769 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
776 revinfo.p1, revinfo.p2,
770 revinfo.p1, revinfo.p2,
777 revinfo.flags, revinfo.node)
771 revinfo.flags, revinfo.node)
778 return fulltext
772 return fulltext
779
773
780 def _builddeltadiff(self, base, revinfo, fh):
774 def _builddeltadiff(self, base, revinfo, fh):
781 revlog = self.revlog
775 revlog = self.revlog
782 t = self.buildtext(revinfo, fh)
776 t = self.buildtext(revinfo, fh)
783 if revlog.iscensored(base):
777 if revlog.iscensored(base):
784 # deltas based on a censored revision must replace the
778 # deltas based on a censored revision must replace the
785 # full content in one patch, so delta works everywhere
779 # full content in one patch, so delta works everywhere
786 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
780 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
787 delta = header + t
781 delta = header + t
788 else:
782 else:
789 ptext = revlog.revision(base, _df=fh, raw=True)
783 ptext = revlog.revision(base, _df=fh, raw=True)
790 delta = mdiff.textdiff(ptext, t)
784 delta = mdiff.textdiff(ptext, t)
791
785
792 return delta
786 return delta
793
787
794 def _builddeltainfo(self, revinfo, base, fh):
788 def _builddeltainfo(self, revinfo, base, fh):
795 # can we use the cached delta?
789 # can we use the cached delta?
796 delta = None
790 delta = None
797 if revinfo.cachedelta:
791 if revinfo.cachedelta:
798 cachebase, cachediff = revinfo.cachedelta
792 cachebase, cachediff = revinfo.cachedelta
799 #check if the diff still apply
793 #check if the diff still apply
800 currentbase = cachebase
794 currentbase = cachebase
801 while (currentbase != nullrev
795 while (currentbase != nullrev
802 and currentbase != base
796 and currentbase != base
803 and self.revlog.length(currentbase) == 0):
797 and self.revlog.length(currentbase) == 0):
804 currentbase = self.revlog.deltaparent(currentbase)
798 currentbase = self.revlog.deltaparent(currentbase)
805 if currentbase == base:
799 if currentbase == base:
806 delta = revinfo.cachedelta[1]
800 delta = revinfo.cachedelta[1]
807 if delta is None:
801 if delta is None:
808 delta = self._builddeltadiff(base, revinfo, fh)
802 delta = self._builddeltadiff(base, revinfo, fh)
809 revlog = self.revlog
803 revlog = self.revlog
810 header, data = revlog.compress(delta)
804 header, data = revlog.compress(delta)
811 deltalen = len(header) + len(data)
805 deltalen = len(header) + len(data)
812 chainbase = revlog.chainbase(base)
806 chainbase = revlog.chainbase(base)
813 offset = revlog.end(len(revlog) - 1)
807 offset = revlog.end(len(revlog) - 1)
814 dist = deltalen + offset - revlog.start(chainbase)
808 dist = deltalen + offset - revlog.start(chainbase)
815 if revlog._generaldelta:
809 if revlog._generaldelta:
816 deltabase = base
810 deltabase = base
817 else:
811 else:
818 deltabase = chainbase
812 deltabase = chainbase
819 chainlen, compresseddeltalen = revlog._chaininfo(base)
813 chainlen, compresseddeltalen = revlog._chaininfo(base)
820 chainlen += 1
814 chainlen += 1
821 compresseddeltalen += deltalen
815 compresseddeltalen += deltalen
822
816
823 revlog = self.revlog
817 revlog = self.revlog
824 snapshotdepth = None
818 snapshotdepth = None
825 if deltabase == nullrev:
819 if deltabase == nullrev:
826 snapshotdepth = 0
820 snapshotdepth = 0
827 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
821 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
828 # A delta chain should always be one full snapshot,
822 # A delta chain should always be one full snapshot,
829 # zero or more semi-snapshots, and zero or more deltas
823 # zero or more semi-snapshots, and zero or more deltas
830 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
824 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
831 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
825 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
832 snapshotdepth = len(revlog._deltachain(deltabase)[0])
826 snapshotdepth = len(revlog._deltachain(deltabase)[0])
833
827
834 return _deltainfo(dist, deltalen, (header, data), deltabase,
828 return _deltainfo(dist, deltalen, (header, data), deltabase,
835 chainbase, chainlen, compresseddeltalen,
829 chainbase, chainlen, compresseddeltalen,
836 snapshotdepth)
830 snapshotdepth)
837
831
838 def _fullsnapshotinfo(self, fh, revinfo):
832 def _fullsnapshotinfo(self, fh, revinfo):
839 curr = len(self.revlog)
833 curr = len(self.revlog)
840 rawtext = self.buildtext(revinfo, fh)
834 rawtext = self.buildtext(revinfo, fh)
841 data = self.revlog.compress(rawtext)
835 data = self.revlog.compress(rawtext)
842 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
836 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
843 deltabase = chainbase = curr
837 deltabase = chainbase = curr
844 snapshotdepth = 0
838 snapshotdepth = 0
845 chainlen = 1
839 chainlen = 1
846
840
847 return _deltainfo(dist, deltalen, data, deltabase,
841 return _deltainfo(dist, deltalen, data, deltabase,
848 chainbase, chainlen, compresseddeltalen,
842 chainbase, chainlen, compresseddeltalen,
849 snapshotdepth)
843 snapshotdepth)
850
844
851 def finddeltainfo(self, revinfo, fh):
845 def finddeltainfo(self, revinfo, fh):
852 """Find an acceptable delta against a candidate revision
846 """Find an acceptable delta against a candidate revision
853
847
854 revinfo: information about the revision (instance of _revisioninfo)
848 revinfo: information about the revision (instance of _revisioninfo)
855 fh: file handle to either the .i or the .d revlog file,
849 fh: file handle to either the .i or the .d revlog file,
856 depending on whether it is inlined or not
850 depending on whether it is inlined or not
857
851
858 Returns the first acceptable candidate revision, as ordered by
852 Returns the first acceptable candidate revision, as ordered by
859 _candidategroups
853 _candidategroups
860
854
861 If no suitable deltabase is found, we return delta info for a full
855 If no suitable deltabase is found, we return delta info for a full
862 snapshot.
856 snapshot.
863 """
857 """
864 if not revinfo.textlen:
858 if not revinfo.textlen:
865 return self._fullsnapshotinfo(fh, revinfo)
859 return self._fullsnapshotinfo(fh, revinfo)
866
860
867 # no delta for flag processor revision (see "candelta" for why)
861 # no delta for flag processor revision (see "candelta" for why)
868 # not calling candelta since only one revision needs test, also to
862 # not calling candelta since only one revision needs test, also to
869 # avoid overhead fetching flags again.
863 # avoid overhead fetching flags again.
870 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
864 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
871 return self._fullsnapshotinfo(fh, revinfo)
865 return self._fullsnapshotinfo(fh, revinfo)
872
866
873 cachedelta = revinfo.cachedelta
867 cachedelta = revinfo.cachedelta
874 p1 = revinfo.p1
868 p1 = revinfo.p1
875 p2 = revinfo.p2
869 p2 = revinfo.p2
876 revlog = self.revlog
870 revlog = self.revlog
877
871
878 deltainfo = None
872 deltainfo = None
879 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
873 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
880 groups = _candidategroups(self.revlog, revinfo.textlen,
874 groups = _candidategroups(self.revlog, revinfo.textlen,
881 p1r, p2r, cachedelta)
875 p1r, p2r, cachedelta)
882 candidaterevs = next(groups)
876 candidaterevs = next(groups)
883 while candidaterevs is not None:
877 while candidaterevs is not None:
884 nominateddeltas = []
878 nominateddeltas = []
885 if deltainfo is not None:
879 if deltainfo is not None:
886 # if we already found a good delta,
880 # if we already found a good delta,
887 # challenge it against refined candidates
881 # challenge it against refined candidates
888 nominateddeltas.append(deltainfo)
882 nominateddeltas.append(deltainfo)
889 for candidaterev in candidaterevs:
883 for candidaterev in candidaterevs:
890 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
884 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
891 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
885 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
892 nominateddeltas.append(candidatedelta)
886 nominateddeltas.append(candidatedelta)
893 if nominateddeltas:
887 if nominateddeltas:
894 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
888 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
895 if deltainfo is not None:
889 if deltainfo is not None:
896 candidaterevs = groups.send(deltainfo.base)
890 candidaterevs = groups.send(deltainfo.base)
897 else:
891 else:
898 candidaterevs = next(groups)
892 candidaterevs = next(groups)
899
893
900 if deltainfo is None:
894 if deltainfo is None:
901 deltainfo = self._fullsnapshotinfo(fh, revinfo)
895 deltainfo = self._fullsnapshotinfo(fh, revinfo)
902 return deltainfo
896 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now