##// END OF EJS Templates
sparse-revlog: stop using a heap to track gaps...
Boris Feld -
r40643:54de2340 default
parent child Browse files
Show More
@@ -1,896 +1,897 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import heapq
13 import heapq
14 import struct
14 import struct
15
15
16 # import stuff from node for others to import from revlog
16 # import stuff from node for others to import from revlog
17 from ..node import (
17 from ..node import (
18 nullrev,
18 nullrev,
19 )
19 )
20 from ..i18n import _
20 from ..i18n import _
21
21
22 from .constants import (
22 from .constants import (
23 REVIDX_ISCENSORED,
23 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
25 )
26
26
27 from ..thirdparty import (
27 from ..thirdparty import (
28 attr,
28 attr,
29 )
29 )
30
30
31 from .. import (
31 from .. import (
32 error,
32 error,
33 mdiff,
33 mdiff,
34 )
34 )
35
35
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0):
42 def __init__(self, data, density=0.5, mingap=0):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47
47
48 def start(self, rev):
48 def start(self, rev):
49 if rev == 0:
49 if rev == 0:
50 return 0
50 return 0
51 return self._data[rev - 1]
51 return self._data[rev - 1]
52
52
53 def end(self, rev):
53 def end(self, rev):
54 return self._data[rev]
54 return self._data[rev]
55
55
56 def length(self, rev):
56 def length(self, rev):
57 return self.end(rev) - self.start(rev)
57 return self.end(rev) - self.start(rev)
58
58
59 def __len__(self):
59 def __len__(self):
60 return len(self._data)
60 return len(self._data)
61
61
62 def slicechunk(revlog, revs, targetsize=None):
62 def slicechunk(revlog, revs, targetsize=None):
63 """slice revs to reduce the amount of unrelated data to be read from disk.
63 """slice revs to reduce the amount of unrelated data to be read from disk.
64
64
65 ``revs`` is sliced into groups that should be read in one time.
65 ``revs`` is sliced into groups that should be read in one time.
66 Assume that revs are sorted.
66 Assume that revs are sorted.
67
67
68 The initial chunk is sliced until the overall density (payload/chunks-span
68 The initial chunk is sliced until the overall density (payload/chunks-span
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
70 `revlog._srmingapsize` is skipped.
70 `revlog._srmingapsize` is skipped.
71
71
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
73 For consistency with other slicing choice, this limit won't go lower than
73 For consistency with other slicing choice, this limit won't go lower than
74 `revlog._srmingapsize`.
74 `revlog._srmingapsize`.
75
75
76 If individual revisions chunk are larger than this limit, they will still
76 If individual revisions chunk are larger than this limit, they will still
77 be raised individually.
77 be raised individually.
78
78
79 >>> revlog = _testrevlog([
79 >>> revlog = _testrevlog([
80 ... 5, #00 (5)
80 ... 5, #00 (5)
81 ... 10, #01 (5)
81 ... 10, #01 (5)
82 ... 12, #02 (2)
82 ... 12, #02 (2)
83 ... 12, #03 (empty)
83 ... 12, #03 (empty)
84 ... 27, #04 (15)
84 ... 27, #04 (15)
85 ... 31, #05 (4)
85 ... 31, #05 (4)
86 ... 31, #06 (empty)
86 ... 31, #06 (empty)
87 ... 42, #07 (11)
87 ... 42, #07 (11)
88 ... 47, #08 (5)
88 ... 47, #08 (5)
89 ... 47, #09 (empty)
89 ... 47, #09 (empty)
90 ... 48, #10 (1)
90 ... 48, #10 (1)
91 ... 51, #11 (3)
91 ... 51, #11 (3)
92 ... 74, #12 (23)
92 ... 74, #12 (23)
93 ... 85, #13 (11)
93 ... 85, #13 (11)
94 ... 86, #14 (1)
94 ... 86, #14 (1)
95 ... 91, #15 (5)
95 ... 91, #15 (5)
96 ... ])
96 ... ])
97
97
98 >>> list(slicechunk(revlog, list(range(16))))
98 >>> list(slicechunk(revlog, list(range(16))))
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
100 >>> list(slicechunk(revlog, [0, 15]))
100 >>> list(slicechunk(revlog, [0, 15]))
101 [[0], [15]]
101 [[0], [15]]
102 >>> list(slicechunk(revlog, [0, 11, 15]))
102 >>> list(slicechunk(revlog, [0, 11, 15]))
103 [[0], [11], [15]]
103 [[0], [11], [15]]
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
105 [[0], [11, 13, 15]]
105 [[0], [11, 13, 15]]
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
107 [[1, 2], [5, 8, 10, 11], [14]]
107 [[1, 2], [5, 8, 10, 11], [14]]
108
108
109 Slicing with a maximum chunk size
109 Slicing with a maximum chunk size
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
111 [[0], [11], [13], [15]]
111 [[0], [11], [13], [15]]
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
113 [[0], [11], [13, 15]]
113 [[0], [11], [13, 15]]
114 """
114 """
115 if targetsize is not None:
115 if targetsize is not None:
116 targetsize = max(targetsize, revlog._srmingapsize)
116 targetsize = max(targetsize, revlog._srmingapsize)
117 # targetsize should not be specified when evaluating delta candidates:
117 # targetsize should not be specified when evaluating delta candidates:
118 # * targetsize is used to ensure we stay within specification when reading,
118 # * targetsize is used to ensure we stay within specification when reading,
119 for chunk in _slicechunktodensity(revlog, revs,
119 for chunk in _slicechunktodensity(revlog, revs,
120 revlog._srdensitythreshold,
120 revlog._srdensitythreshold,
121 revlog._srmingapsize):
121 revlog._srmingapsize):
122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
122 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
123 yield subchunk
123 yield subchunk
124
124
125 def _slicechunktosize(revlog, revs, targetsize=None):
125 def _slicechunktosize(revlog, revs, targetsize=None):
126 """slice revs to match the target size
126 """slice revs to match the target size
127
127
128 This is intended to be used on chunk that density slicing selected by that
128 This is intended to be used on chunk that density slicing selected by that
129 are still too large compared to the read garantee of revlog. This might
129 are still too large compared to the read garantee of revlog. This might
130 happens when "minimal gap size" interrupted the slicing or when chain are
130 happens when "minimal gap size" interrupted the slicing or when chain are
131 built in a way that create large blocks next to each other.
131 built in a way that create large blocks next to each other.
132
132
133 >>> revlog = _testrevlog([
133 >>> revlog = _testrevlog([
134 ... 3, #0 (3)
134 ... 3, #0 (3)
135 ... 5, #1 (2)
135 ... 5, #1 (2)
136 ... 6, #2 (1)
136 ... 6, #2 (1)
137 ... 8, #3 (2)
137 ... 8, #3 (2)
138 ... 8, #4 (empty)
138 ... 8, #4 (empty)
139 ... 11, #5 (3)
139 ... 11, #5 (3)
140 ... 12, #6 (1)
140 ... 12, #6 (1)
141 ... 13, #7 (1)
141 ... 13, #7 (1)
142 ... 14, #8 (1)
142 ... 14, #8 (1)
143 ... ])
143 ... ])
144
144
145 Cases where chunk is already small enough
145 Cases where chunk is already small enough
146 >>> list(_slicechunktosize(revlog, [0], 3))
146 >>> list(_slicechunktosize(revlog, [0], 3))
147 [[0]]
147 [[0]]
148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
148 >>> list(_slicechunktosize(revlog, [6, 7], 3))
149 [[6, 7]]
149 [[6, 7]]
150 >>> list(_slicechunktosize(revlog, [0], None))
150 >>> list(_slicechunktosize(revlog, [0], None))
151 [[0]]
151 [[0]]
152 >>> list(_slicechunktosize(revlog, [6, 7], None))
152 >>> list(_slicechunktosize(revlog, [6, 7], None))
153 [[6, 7]]
153 [[6, 7]]
154
154
155 cases where we need actual slicing
155 cases where we need actual slicing
156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
156 >>> list(_slicechunktosize(revlog, [0, 1], 3))
157 [[0], [1]]
157 [[0], [1]]
158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
158 >>> list(_slicechunktosize(revlog, [1, 3], 3))
159 [[1], [3]]
159 [[1], [3]]
160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
160 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
161 [[1, 2], [3]]
161 [[1, 2], [3]]
162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
162 >>> list(_slicechunktosize(revlog, [3, 5], 3))
163 [[3], [5]]
163 [[3], [5]]
164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
164 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
165 [[3], [5]]
165 [[3], [5]]
166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
166 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
167 [[5], [6, 7, 8]]
167 [[5], [6, 7, 8]]
168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
168 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
169 [[0], [1, 2], [3], [5], [6, 7, 8]]
169 [[0], [1, 2], [3], [5], [6, 7, 8]]
170
170
171 Case with too large individual chunk (must return valid chunk)
171 Case with too large individual chunk (must return valid chunk)
172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
172 >>> list(_slicechunktosize(revlog, [0, 1], 2))
173 [[0], [1]]
173 [[0], [1]]
174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
174 >>> list(_slicechunktosize(revlog, [1, 3], 1))
175 [[1], [3]]
175 [[1], [3]]
176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
176 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
177 [[3], [5]]
177 [[3], [5]]
178 """
178 """
179 assert targetsize is None or 0 <= targetsize
179 assert targetsize is None or 0 <= targetsize
180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
180 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
181 yield revs
181 yield revs
182 return
182 return
183
183
184 startrevidx = 0
184 startrevidx = 0
185 startdata = revlog.start(revs[0])
185 startdata = revlog.start(revs[0])
186 endrevidx = 0
186 endrevidx = 0
187 iterrevs = enumerate(revs)
187 iterrevs = enumerate(revs)
188 next(iterrevs) # skip first rev.
188 next(iterrevs) # skip first rev.
189 for idx, r in iterrevs:
189 for idx, r in iterrevs:
190 span = revlog.end(r) - startdata
190 span = revlog.end(r) - startdata
191 if span <= targetsize:
191 if span <= targetsize:
192 endrevidx = idx
192 endrevidx = idx
193 else:
193 else:
194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
194 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
195 if chunk:
195 if chunk:
196 yield chunk
196 yield chunk
197 startrevidx = idx
197 startrevidx = idx
198 startdata = revlog.start(r)
198 startdata = revlog.start(r)
199 endrevidx = idx
199 endrevidx = idx
200 yield _trimchunk(revlog, revs, startrevidx)
200 yield _trimchunk(revlog, revs, startrevidx)
201
201
202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
202 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
203 mingapsize=0):
203 mingapsize=0):
204 """slice revs to reduce the amount of unrelated data to be read from disk.
204 """slice revs to reduce the amount of unrelated data to be read from disk.
205
205
206 ``revs`` is sliced into groups that should be read in one time.
206 ``revs`` is sliced into groups that should be read in one time.
207 Assume that revs are sorted.
207 Assume that revs are sorted.
208
208
209 The initial chunk is sliced until the overall density (payload/chunks-span
209 The initial chunk is sliced until the overall density (payload/chunks-span
210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
210 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
211 skipped.
211 skipped.
212
212
213 >>> revlog = _testrevlog([
213 >>> revlog = _testrevlog([
214 ... 5, #00 (5)
214 ... 5, #00 (5)
215 ... 10, #01 (5)
215 ... 10, #01 (5)
216 ... 12, #02 (2)
216 ... 12, #02 (2)
217 ... 12, #03 (empty)
217 ... 12, #03 (empty)
218 ... 27, #04 (15)
218 ... 27, #04 (15)
219 ... 31, #05 (4)
219 ... 31, #05 (4)
220 ... 31, #06 (empty)
220 ... 31, #06 (empty)
221 ... 42, #07 (11)
221 ... 42, #07 (11)
222 ... 47, #08 (5)
222 ... 47, #08 (5)
223 ... 47, #09 (empty)
223 ... 47, #09 (empty)
224 ... 48, #10 (1)
224 ... 48, #10 (1)
225 ... 51, #11 (3)
225 ... 51, #11 (3)
226 ... 74, #12 (23)
226 ... 74, #12 (23)
227 ... 85, #13 (11)
227 ... 85, #13 (11)
228 ... 86, #14 (1)
228 ... 86, #14 (1)
229 ... 91, #15 (5)
229 ... 91, #15 (5)
230 ... ])
230 ... ])
231
231
232 >>> list(_slicechunktodensity(revlog, list(range(16))))
232 >>> list(_slicechunktodensity(revlog, list(range(16))))
233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
233 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
234 >>> list(_slicechunktodensity(revlog, [0, 15]))
234 >>> list(_slicechunktodensity(revlog, [0, 15]))
235 [[0], [15]]
235 [[0], [15]]
236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
236 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
237 [[0], [11], [15]]
237 [[0], [11], [15]]
238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
238 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
239 [[0], [11, 13, 15]]
239 [[0], [11, 13, 15]]
240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
240 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
241 [[1, 2], [5, 8, 10, 11], [14]]
241 [[1, 2], [5, 8, 10, 11], [14]]
242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
242 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
243 ... mingapsize=20))
243 ... mingapsize=20))
244 [[1, 2, 3, 5, 8, 10, 11], [14]]
244 [[1, 2, 3, 5, 8, 10, 11], [14]]
245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
246 ... targetdensity=0.95))
246 ... targetdensity=0.95))
247 [[1, 2], [5], [8, 10, 11], [14]]
247 [[1, 2], [5], [8, 10, 11], [14]]
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
249 ... targetdensity=0.95, mingapsize=12))
249 ... targetdensity=0.95, mingapsize=12))
250 [[1, 2], [5, 8, 10, 11], [14]]
250 [[1, 2], [5, 8, 10, 11], [14]]
251 """
251 """
252 start = revlog.start
252 start = revlog.start
253 length = revlog.length
253 length = revlog.length
254
254
255 if len(revs) <= 1:
255 if len(revs) <= 1:
256 yield revs
256 yield revs
257 return
257 return
258
258
259 deltachainspan = segmentspan(revlog, revs)
259 deltachainspan = segmentspan(revlog, revs)
260
260
261 if deltachainspan < mingapsize:
261 if deltachainspan < mingapsize:
262 yield revs
262 yield revs
263 return
263 return
264
264
265 readdata = deltachainspan
265 readdata = deltachainspan
266 chainpayload = sum(length(r) for r in revs)
266 chainpayload = sum(length(r) for r in revs)
267
267
268 if deltachainspan:
268 if deltachainspan:
269 density = chainpayload / float(deltachainspan)
269 density = chainpayload / float(deltachainspan)
270 else:
270 else:
271 density = 1.0
271 density = 1.0
272
272
273 if density >= targetdensity:
273 if density >= targetdensity:
274 yield revs
274 yield revs
275 return
275 return
276
276
277 # Store the gaps in a heap to have them sorted by decreasing size
277 # Store the gaps in a heap to have them sorted by decreasing size
278 gapsheap = []
278 gaps = []
279 heapq.heapify(gapsheap)
280 prevend = None
279 prevend = None
281 for i, rev in enumerate(revs):
280 for i, rev in enumerate(revs):
282 revstart = start(rev)
281 revstart = start(rev)
283 revlen = length(rev)
282 revlen = length(rev)
284
283
285 # Skip empty revisions to form larger holes
284 # Skip empty revisions to form larger holes
286 if revlen == 0:
285 if revlen == 0:
287 continue
286 continue
288
287
289 if prevend is not None:
288 if prevend is not None:
290 gapsize = revstart - prevend
289 gapsize = revstart - prevend
291 # only consider holes that are large enough
290 # only consider holes that are large enough
292 if gapsize > mingapsize:
291 if gapsize > mingapsize:
293 heapq.heappush(gapsheap, (-gapsize, i))
292 gaps.append((gapsize, i))
294
293
295 prevend = revstart + revlen
294 prevend = revstart + revlen
295 # sort the gaps to pop them from largest to small
296 gaps.sort()
296
297
297 # Collect the indices of the largest holes until the density is acceptable
298 # Collect the indices of the largest holes until the density is acceptable
298 indicesheap = []
299 indicesheap = []
299 heapq.heapify(indicesheap)
300 heapq.heapify(indicesheap)
300 while gapsheap and density < targetdensity:
301 while gaps and density < targetdensity:
301 oppgapsize, gapidx = heapq.heappop(gapsheap)
302 gapsize, gapidx = gaps.pop()
302
303
303 heapq.heappush(indicesheap, gapidx)
304 heapq.heappush(indicesheap, gapidx)
304
305
305 # the gap sizes are stored as negatives to be sorted decreasingly
306 # the gap sizes are stored as negatives to be sorted decreasingly
306 # by the heap
307 # by the heap
307 readdata -= (-oppgapsize)
308 readdata -= gapsize
308 if readdata > 0:
309 if readdata > 0:
309 density = chainpayload / float(readdata)
310 density = chainpayload / float(readdata)
310 else:
311 else:
311 density = 1.0
312 density = 1.0
312
313
313 # Cut the revs at collected indices
314 # Cut the revs at collected indices
314 previdx = 0
315 previdx = 0
315 while indicesheap:
316 while indicesheap:
316 idx = heapq.heappop(indicesheap)
317 idx = heapq.heappop(indicesheap)
317
318
318 chunk = _trimchunk(revlog, revs, previdx, idx)
319 chunk = _trimchunk(revlog, revs, previdx, idx)
319 if chunk:
320 if chunk:
320 yield chunk
321 yield chunk
321
322
322 previdx = idx
323 previdx = idx
323
324
324 chunk = _trimchunk(revlog, revs, previdx)
325 chunk = _trimchunk(revlog, revs, previdx)
325 if chunk:
326 if chunk:
326 yield chunk
327 yield chunk
327
328
328 def _trimchunk(revlog, revs, startidx, endidx=None):
329 def _trimchunk(revlog, revs, startidx, endidx=None):
329 """returns revs[startidx:endidx] without empty trailing revs
330 """returns revs[startidx:endidx] without empty trailing revs
330
331
331 Doctest Setup
332 Doctest Setup
332 >>> revlog = _testrevlog([
333 >>> revlog = _testrevlog([
333 ... 5, #0
334 ... 5, #0
334 ... 10, #1
335 ... 10, #1
335 ... 12, #2
336 ... 12, #2
336 ... 12, #3 (empty)
337 ... 12, #3 (empty)
337 ... 17, #4
338 ... 17, #4
338 ... 21, #5
339 ... 21, #5
339 ... 21, #6 (empty)
340 ... 21, #6 (empty)
340 ... ])
341 ... ])
341
342
342 Contiguous cases:
343 Contiguous cases:
343 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
344 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
344 [0, 1, 2, 3, 4, 5]
345 [0, 1, 2, 3, 4, 5]
345 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
346 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
346 [0, 1, 2, 3, 4]
347 [0, 1, 2, 3, 4]
347 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
348 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
348 [0, 1, 2]
349 [0, 1, 2]
349 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
350 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
350 [2]
351 [2]
351 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
352 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
352 [3, 4, 5]
353 [3, 4, 5]
353 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
354 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
354 [3, 4]
355 [3, 4]
355
356
356 Discontiguous cases:
357 Discontiguous cases:
357 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
358 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
358 [1, 3, 5]
359 [1, 3, 5]
359 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
360 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
360 [1]
361 [1]
361 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
362 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
362 [3, 5]
363 [3, 5]
363 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
364 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
364 [3, 5]
365 [3, 5]
365 """
366 """
366 length = revlog.length
367 length = revlog.length
367
368
368 if endidx is None:
369 if endidx is None:
369 endidx = len(revs)
370 endidx = len(revs)
370
371
371 # If we have a non-emtpy delta candidate, there are nothing to trim
372 # If we have a non-emtpy delta candidate, there are nothing to trim
372 if revs[endidx - 1] < len(revlog):
373 if revs[endidx - 1] < len(revlog):
373 # Trim empty revs at the end, except the very first revision of a chain
374 # Trim empty revs at the end, except the very first revision of a chain
374 while (endidx > 1
375 while (endidx > 1
375 and endidx > startidx
376 and endidx > startidx
376 and length(revs[endidx - 1]) == 0):
377 and length(revs[endidx - 1]) == 0):
377 endidx -= 1
378 endidx -= 1
378
379
379 return revs[startidx:endidx]
380 return revs[startidx:endidx]
380
381
381 def segmentspan(revlog, revs):
382 def segmentspan(revlog, revs):
382 """Get the byte span of a segment of revisions
383 """Get the byte span of a segment of revisions
383
384
384 revs is a sorted array of revision numbers
385 revs is a sorted array of revision numbers
385
386
386 >>> revlog = _testrevlog([
387 >>> revlog = _testrevlog([
387 ... 5, #0
388 ... 5, #0
388 ... 10, #1
389 ... 10, #1
389 ... 12, #2
390 ... 12, #2
390 ... 12, #3 (empty)
391 ... 12, #3 (empty)
391 ... 17, #4
392 ... 17, #4
392 ... ])
393 ... ])
393
394
394 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
395 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
395 17
396 17
396 >>> segmentspan(revlog, [0, 4])
397 >>> segmentspan(revlog, [0, 4])
397 17
398 17
398 >>> segmentspan(revlog, [3, 4])
399 >>> segmentspan(revlog, [3, 4])
399 5
400 5
400 >>> segmentspan(revlog, [1, 2, 3,])
401 >>> segmentspan(revlog, [1, 2, 3,])
401 7
402 7
402 >>> segmentspan(revlog, [1, 3])
403 >>> segmentspan(revlog, [1, 3])
403 7
404 7
404 """
405 """
405 if not revs:
406 if not revs:
406 return 0
407 return 0
407 end = revlog.end(revs[-1])
408 end = revlog.end(revs[-1])
408 return end - revlog.start(revs[0])
409 return end - revlog.start(revs[0])
409
410
410 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
411 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
411 """build full text from a (base, delta) pair and other metadata"""
412 """build full text from a (base, delta) pair and other metadata"""
412 # special case deltas which replace entire base; no need to decode
413 # special case deltas which replace entire base; no need to decode
413 # base revision. this neatly avoids censored bases, which throw when
414 # base revision. this neatly avoids censored bases, which throw when
414 # they're decoded.
415 # they're decoded.
415 hlen = struct.calcsize(">lll")
416 hlen = struct.calcsize(">lll")
416 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
417 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
417 len(delta) - hlen):
418 len(delta) - hlen):
418 fulltext = delta[hlen:]
419 fulltext = delta[hlen:]
419 else:
420 else:
420 # deltabase is rawtext before changed by flag processors, which is
421 # deltabase is rawtext before changed by flag processors, which is
421 # equivalent to non-raw text
422 # equivalent to non-raw text
422 basetext = revlog.revision(baserev, _df=fh, raw=False)
423 basetext = revlog.revision(baserev, _df=fh, raw=False)
423 fulltext = mdiff.patch(basetext, delta)
424 fulltext = mdiff.patch(basetext, delta)
424
425
425 try:
426 try:
426 res = revlog._processflags(fulltext, flags, 'read', raw=True)
427 res = revlog._processflags(fulltext, flags, 'read', raw=True)
427 fulltext, validatehash = res
428 fulltext, validatehash = res
428 if validatehash:
429 if validatehash:
429 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
430 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
430 if flags & REVIDX_ISCENSORED:
431 if flags & REVIDX_ISCENSORED:
431 raise error.StorageError(_('node %s is not censored') %
432 raise error.StorageError(_('node %s is not censored') %
432 expectednode)
433 expectednode)
433 except error.CensoredNodeError:
434 except error.CensoredNodeError:
434 # must pass the censored index flag to add censored revisions
435 # must pass the censored index flag to add censored revisions
435 if not flags & REVIDX_ISCENSORED:
436 if not flags & REVIDX_ISCENSORED:
436 raise
437 raise
437 return fulltext
438 return fulltext
438
439
439 @attr.s(slots=True, frozen=True)
440 @attr.s(slots=True, frozen=True)
440 class _deltainfo(object):
441 class _deltainfo(object):
441 distance = attr.ib()
442 distance = attr.ib()
442 deltalen = attr.ib()
443 deltalen = attr.ib()
443 data = attr.ib()
444 data = attr.ib()
444 base = attr.ib()
445 base = attr.ib()
445 chainbase = attr.ib()
446 chainbase = attr.ib()
446 chainlen = attr.ib()
447 chainlen = attr.ib()
447 compresseddeltalen = attr.ib()
448 compresseddeltalen = attr.ib()
448 snapshotdepth = attr.ib()
449 snapshotdepth = attr.ib()
449
450
450 def isgooddeltainfo(revlog, deltainfo, revinfo):
451 def isgooddeltainfo(revlog, deltainfo, revinfo):
451 """Returns True if the given delta is good. Good means that it is within
452 """Returns True if the given delta is good. Good means that it is within
452 the disk span, disk size, and chain length bounds that we know to be
453 the disk span, disk size, and chain length bounds that we know to be
453 performant."""
454 performant."""
454 if deltainfo is None:
455 if deltainfo is None:
455 return False
456 return False
456
457
457 # - 'deltainfo.distance' is the distance from the base revision --
458 # - 'deltainfo.distance' is the distance from the base revision --
458 # bounding it limits the amount of I/O we need to do.
459 # bounding it limits the amount of I/O we need to do.
459 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
460 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
460 # deltas we need to apply -- bounding it limits the amount of CPU
461 # deltas we need to apply -- bounding it limits the amount of CPU
461 # we consume.
462 # we consume.
462
463
463 textlen = revinfo.textlen
464 textlen = revinfo.textlen
464 defaultmax = textlen * 4
465 defaultmax = textlen * 4
465 maxdist = revlog._maxdeltachainspan
466 maxdist = revlog._maxdeltachainspan
466 if not maxdist:
467 if not maxdist:
467 maxdist = deltainfo.distance # ensure the conditional pass
468 maxdist = deltainfo.distance # ensure the conditional pass
468 maxdist = max(maxdist, defaultmax)
469 maxdist = max(maxdist, defaultmax)
469
470
470 # Bad delta from read span:
471 # Bad delta from read span:
471 #
472 #
472 # If the span of data read is larger than the maximum allowed.
473 # If the span of data read is larger than the maximum allowed.
473 #
474 #
474 # In the sparse-revlog case, we rely on the associated "sparse reading"
475 # In the sparse-revlog case, we rely on the associated "sparse reading"
475 # to avoid issue related to the span of data. In theory, it would be
476 # to avoid issue related to the span of data. In theory, it would be
476 # possible to build pathological revlog where delta pattern would lead
477 # possible to build pathological revlog where delta pattern would lead
477 # to too many reads. However, they do not happen in practice at all. So
478 # to too many reads. However, they do not happen in practice at all. So
478 # we skip the span check entirely.
479 # we skip the span check entirely.
479 if not revlog._sparserevlog and maxdist < deltainfo.distance:
480 if not revlog._sparserevlog and maxdist < deltainfo.distance:
480 return False
481 return False
481
482
482 # Bad delta from new delta size:
483 # Bad delta from new delta size:
483 #
484 #
484 # If the delta size is larger than the target text, storing the
485 # If the delta size is larger than the target text, storing the
485 # delta will be inefficient.
486 # delta will be inefficient.
486 if textlen < deltainfo.deltalen:
487 if textlen < deltainfo.deltalen:
487 return False
488 return False
488
489
489 # Bad delta from cumulated payload size:
490 # Bad delta from cumulated payload size:
490 #
491 #
491 # If the sum of delta get larger than K * target text length.
492 # If the sum of delta get larger than K * target text length.
492 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
493 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
493 return False
494 return False
494
495
495 # Bad delta from chain length:
496 # Bad delta from chain length:
496 #
497 #
497 # If the number of delta in the chain gets too high.
498 # If the number of delta in the chain gets too high.
498 if (revlog._maxchainlen
499 if (revlog._maxchainlen
499 and revlog._maxchainlen < deltainfo.chainlen):
500 and revlog._maxchainlen < deltainfo.chainlen):
500 return False
501 return False
501
502
502 # bad delta from intermediate snapshot size limit
503 # bad delta from intermediate snapshot size limit
503 #
504 #
504 # If an intermediate snapshot size is higher than the limit. The
505 # If an intermediate snapshot size is higher than the limit. The
505 # limit exist to prevent endless chain of intermediate delta to be
506 # limit exist to prevent endless chain of intermediate delta to be
506 # created.
507 # created.
507 if (deltainfo.snapshotdepth is not None and
508 if (deltainfo.snapshotdepth is not None and
508 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
509 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
509 return False
510 return False
510
511
511 # bad delta if new intermediate snapshot is larger than the previous
512 # bad delta if new intermediate snapshot is larger than the previous
512 # snapshot
513 # snapshot
513 if (deltainfo.snapshotdepth
514 if (deltainfo.snapshotdepth
514 and revlog.length(deltainfo.base) < deltainfo.deltalen):
515 and revlog.length(deltainfo.base) < deltainfo.deltalen):
515 return False
516 return False
516
517
517 return True
518 return True
518
519
519 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
520 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
520 """Provides group of revision to be tested as delta base
521 """Provides group of revision to be tested as delta base
521
522
522 This top level function focus on emitting groups with unique and worthwhile
523 This top level function focus on emitting groups with unique and worthwhile
523 content. See _raw_candidate_groups for details about the group order.
524 content. See _raw_candidate_groups for details about the group order.
524 """
525 """
525 # should we try to build a delta?
526 # should we try to build a delta?
526 if not (len(revlog) and revlog._storedeltachains):
527 if not (len(revlog) and revlog._storedeltachains):
527 yield None
528 yield None
528 return
529 return
529
530
530 deltalength = revlog.length
531 deltalength = revlog.length
531 deltaparent = revlog.deltaparent
532 deltaparent = revlog.deltaparent
532 good = None
533 good = None
533
534
534 deltas_limit = textlen * LIMIT_DELTA2TEXT
535 deltas_limit = textlen * LIMIT_DELTA2TEXT
535
536
536 tested = set([nullrev])
537 tested = set([nullrev])
537 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
538 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
538 while True:
539 while True:
539 temptative = candidates.send(good)
540 temptative = candidates.send(good)
540 if temptative is None:
541 if temptative is None:
541 break
542 break
542 group = []
543 group = []
543 for rev in temptative:
544 for rev in temptative:
544 # skip over empty delta (no need to include them in a chain)
545 # skip over empty delta (no need to include them in a chain)
545 while (revlog._generaldelta
546 while (revlog._generaldelta
546 and not (rev == nullrev
547 and not (rev == nullrev
547 or rev in tested
548 or rev in tested
548 or deltalength(rev))):
549 or deltalength(rev))):
549 tested.add(rev)
550 tested.add(rev)
550 rev = deltaparent(rev)
551 rev = deltaparent(rev)
551 # filter out revision we tested already
552 # filter out revision we tested already
552 if rev in tested:
553 if rev in tested:
553 continue
554 continue
554 tested.add(rev)
555 tested.add(rev)
555 # filter out delta base that will never produce good delta
556 # filter out delta base that will never produce good delta
556 if deltas_limit < revlog.length(rev):
557 if deltas_limit < revlog.length(rev):
557 continue
558 continue
558 # no need to try a delta against nullrev, this will be done as a
559 # no need to try a delta against nullrev, this will be done as a
559 # last resort.
560 # last resort.
560 if rev == nullrev:
561 if rev == nullrev:
561 continue
562 continue
562 # no delta for rawtext-changing revs (see "candelta" for why)
563 # no delta for rawtext-changing revs (see "candelta" for why)
563 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
564 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
564 continue
565 continue
565 group.append(rev)
566 group.append(rev)
566 if group:
567 if group:
567 # XXX: in the sparse revlog case, group can become large,
568 # XXX: in the sparse revlog case, group can become large,
568 # impacting performances. Some bounding or slicing mecanism
569 # impacting performances. Some bounding or slicing mecanism
569 # would help to reduce this impact.
570 # would help to reduce this impact.
570 good = yield tuple(group)
571 good = yield tuple(group)
571 yield None
572 yield None
572
573
573 def _findsnapshots(revlog, cache, start_rev):
574 def _findsnapshots(revlog, cache, start_rev):
574 """find snapshot from start_rev to tip"""
575 """find snapshot from start_rev to tip"""
575 deltaparent = revlog.deltaparent
576 deltaparent = revlog.deltaparent
576 issnapshot = revlog.issnapshot
577 issnapshot = revlog.issnapshot
577 for rev in revlog.revs(start_rev):
578 for rev in revlog.revs(start_rev):
578 if issnapshot(rev):
579 if issnapshot(rev):
579 cache[deltaparent(rev)].append(rev)
580 cache[deltaparent(rev)].append(rev)
580
581
581 def _refinedgroups(revlog, p1, p2, cachedelta):
582 def _refinedgroups(revlog, p1, p2, cachedelta):
582 good = None
583 good = None
583 # First we try to reuse a the delta contained in the bundle.
584 # First we try to reuse a the delta contained in the bundle.
584 # (or from the source revlog)
585 # (or from the source revlog)
585 #
586 #
586 # This logic only applies to general delta repositories and can be disabled
587 # This logic only applies to general delta repositories and can be disabled
587 # through configuration. Disabling reuse source delta is useful when
588 # through configuration. Disabling reuse source delta is useful when
588 # we want to make sure we recomputed "optimal" deltas.
589 # we want to make sure we recomputed "optimal" deltas.
589 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
590 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
590 # Assume what we received from the server is a good choice
591 # Assume what we received from the server is a good choice
591 # build delta will reuse the cache
592 # build delta will reuse the cache
592 good = yield (cachedelta[0],)
593 good = yield (cachedelta[0],)
593 if good is not None:
594 if good is not None:
594 yield None
595 yield None
595 return
596 return
596 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
597 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
597 good = yield candidates
598 good = yield candidates
598 if good is not None:
599 if good is not None:
599 break
600 break
600
601
601 # If sparse revlog is enabled, we can try to refine the available deltas
602 # If sparse revlog is enabled, we can try to refine the available deltas
602 if not revlog._sparserevlog:
603 if not revlog._sparserevlog:
603 yield None
604 yield None
604 return
605 return
605
606
606 # if we have a refinable value, try to refine it
607 # if we have a refinable value, try to refine it
607 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
608 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
608 # refine snapshot down
609 # refine snapshot down
609 previous = None
610 previous = None
610 while previous != good:
611 while previous != good:
611 previous = good
612 previous = good
612 base = revlog.deltaparent(good)
613 base = revlog.deltaparent(good)
613 if base == nullrev:
614 if base == nullrev:
614 break
615 break
615 good = yield (base,)
616 good = yield (base,)
616 # refine snapshot up
617 # refine snapshot up
617 #
618 #
618 # XXX the _findsnapshots call can be expensive and is "duplicated" with
619 # XXX the _findsnapshots call can be expensive and is "duplicated" with
619 # the one done in `_rawgroups`. Once we start working on performance,
620 # the one done in `_rawgroups`. Once we start working on performance,
620 # we should make the two logics share this computation.
621 # we should make the two logics share this computation.
621 snapshots = collections.defaultdict(list)
622 snapshots = collections.defaultdict(list)
622 _findsnapshots(revlog, snapshots, good + 1)
623 _findsnapshots(revlog, snapshots, good + 1)
623 previous = None
624 previous = None
624 while good != previous:
625 while good != previous:
625 previous = good
626 previous = good
626 children = tuple(sorted(c for c in snapshots[good]))
627 children = tuple(sorted(c for c in snapshots[good]))
627 good = yield children
628 good = yield children
628
629
629 # we have found nothing
630 # we have found nothing
630 yield None
631 yield None
631
632
632 def _rawgroups(revlog, p1, p2, cachedelta):
633 def _rawgroups(revlog, p1, p2, cachedelta):
633 """Provides group of revision to be tested as delta base
634 """Provides group of revision to be tested as delta base
634
635
635 This lower level function focus on emitting delta theorically interresting
636 This lower level function focus on emitting delta theorically interresting
636 without looking it any practical details.
637 without looking it any practical details.
637
638
638 The group order aims at providing fast or small candidates first.
639 The group order aims at providing fast or small candidates first.
639 """
640 """
640 gdelta = revlog._generaldelta
641 gdelta = revlog._generaldelta
641 sparse = revlog._sparserevlog
642 sparse = revlog._sparserevlog
642 curr = len(revlog)
643 curr = len(revlog)
643 prev = curr - 1
644 prev = curr - 1
644 deltachain = lambda rev: revlog._deltachain(rev)[0]
645 deltachain = lambda rev: revlog._deltachain(rev)[0]
645
646
646 if gdelta:
647 if gdelta:
647 # exclude already lazy tested base if any
648 # exclude already lazy tested base if any
648 parents = [p for p in (p1, p2) if p != nullrev]
649 parents = [p for p in (p1, p2) if p != nullrev]
649
650
650 if not revlog._deltabothparents and len(parents) == 2:
651 if not revlog._deltabothparents and len(parents) == 2:
651 parents.sort()
652 parents.sort()
652 # To minimize the chance of having to build a fulltext,
653 # To minimize the chance of having to build a fulltext,
653 # pick first whichever parent is closest to us (max rev)
654 # pick first whichever parent is closest to us (max rev)
654 yield (parents[1],)
655 yield (parents[1],)
655 # then the other one (min rev) if the first did not fit
656 # then the other one (min rev) if the first did not fit
656 yield (parents[0],)
657 yield (parents[0],)
657 elif len(parents) > 0:
658 elif len(parents) > 0:
658 # Test all parents (1 or 2), and keep the best candidate
659 # Test all parents (1 or 2), and keep the best candidate
659 yield parents
660 yield parents
660
661
661 if sparse and parents:
662 if sparse and parents:
662 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
663 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
663 # See if we can use an existing snapshot in the parent chains to use as
664 # See if we can use an existing snapshot in the parent chains to use as
664 # a base for a new intermediate-snapshot
665 # a base for a new intermediate-snapshot
665 #
666 #
666 # search for snapshot in parents delta chain
667 # search for snapshot in parents delta chain
667 # map: snapshot-level: snapshot-rev
668 # map: snapshot-level: snapshot-rev
668 parents_snaps = collections.defaultdict(set)
669 parents_snaps = collections.defaultdict(set)
669 candidate_chains = [deltachain(p) for p in parents]
670 candidate_chains = [deltachain(p) for p in parents]
670 for chain in candidate_chains:
671 for chain in candidate_chains:
671 for idx, s in enumerate(chain):
672 for idx, s in enumerate(chain):
672 if not revlog.issnapshot(s):
673 if not revlog.issnapshot(s):
673 break
674 break
674 parents_snaps[idx].add(s)
675 parents_snaps[idx].add(s)
675 snapfloor = min(parents_snaps[0]) + 1
676 snapfloor = min(parents_snaps[0]) + 1
676 _findsnapshots(revlog, snapshots, snapfloor)
677 _findsnapshots(revlog, snapshots, snapfloor)
677 # search for the highest "unrelated" revision
678 # search for the highest "unrelated" revision
678 #
679 #
679 # Adding snapshots used by "unrelated" revision increase the odd we
680 # Adding snapshots used by "unrelated" revision increase the odd we
680 # reuse an independant, yet better snapshot chain.
681 # reuse an independant, yet better snapshot chain.
681 #
682 #
682 # XXX instead of building a set of revisions, we could lazily enumerate
683 # XXX instead of building a set of revisions, we could lazily enumerate
683 # over the chains. That would be more efficient, however we stick to
684 # over the chains. That would be more efficient, however we stick to
684 # simple code for now.
685 # simple code for now.
685 all_revs = set()
686 all_revs = set()
686 for chain in candidate_chains:
687 for chain in candidate_chains:
687 all_revs.update(chain)
688 all_revs.update(chain)
688 other = None
689 other = None
689 for r in revlog.revs(prev, snapfloor):
690 for r in revlog.revs(prev, snapfloor):
690 if r not in all_revs:
691 if r not in all_revs:
691 other = r
692 other = r
692 break
693 break
693 if other is not None:
694 if other is not None:
694 # To avoid unfair competition, we won't use unrelated intermediate
695 # To avoid unfair competition, we won't use unrelated intermediate
695 # snapshot that are deeper than the ones from the parent delta
696 # snapshot that are deeper than the ones from the parent delta
696 # chain.
697 # chain.
697 max_depth = max(parents_snaps.keys())
698 max_depth = max(parents_snaps.keys())
698 chain = deltachain(other)
699 chain = deltachain(other)
699 for idx, s in enumerate(chain):
700 for idx, s in enumerate(chain):
700 if s < snapfloor:
701 if s < snapfloor:
701 continue
702 continue
702 if max_depth < idx:
703 if max_depth < idx:
703 break
704 break
704 if not revlog.issnapshot(s):
705 if not revlog.issnapshot(s):
705 break
706 break
706 parents_snaps[idx].add(s)
707 parents_snaps[idx].add(s)
707 # Test them as possible intermediate snapshot base
708 # Test them as possible intermediate snapshot base
708 # We test them from highest to lowest level. High level one are more
709 # We test them from highest to lowest level. High level one are more
709 # likely to result in small delta
710 # likely to result in small delta
710 floor = None
711 floor = None
711 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
712 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
712 siblings = set()
713 siblings = set()
713 for s in snaps:
714 for s in snaps:
714 siblings.update(snapshots[s])
715 siblings.update(snapshots[s])
715 # Before considering making a new intermediate snapshot, we check
716 # Before considering making a new intermediate snapshot, we check
716 # if an existing snapshot, children of base we consider, would be
717 # if an existing snapshot, children of base we consider, would be
717 # suitable.
718 # suitable.
718 #
719 #
719 # It give a change to reuse a delta chain "unrelated" to the
720 # It give a change to reuse a delta chain "unrelated" to the
720 # current revision instead of starting our own. Without such
721 # current revision instead of starting our own. Without such
721 # re-use, topological branches would keep reopening new chains.
722 # re-use, topological branches would keep reopening new chains.
722 # Creating more and more snapshot as the repository grow.
723 # Creating more and more snapshot as the repository grow.
723
724
724 if floor is not None:
725 if floor is not None:
725 # We only do this for siblings created after the one in our
726 # We only do this for siblings created after the one in our
726 # parent's delta chain. Those created before has less chances
727 # parent's delta chain. Those created before has less chances
727 # to be valid base since our ancestors had to create a new
728 # to be valid base since our ancestors had to create a new
728 # snapshot.
729 # snapshot.
729 siblings = [r for r in siblings if floor < r]
730 siblings = [r for r in siblings if floor < r]
730 yield tuple(sorted(siblings))
731 yield tuple(sorted(siblings))
731 # then test the base from our parent's delta chain.
732 # then test the base from our parent's delta chain.
732 yield tuple(sorted(snaps))
733 yield tuple(sorted(snaps))
733 floor = min(snaps)
734 floor = min(snaps)
734 # No suitable base found in the parent chain, search if any full
735 # No suitable base found in the parent chain, search if any full
735 # snapshots emitted since parent's base would be a suitable base for an
736 # snapshots emitted since parent's base would be a suitable base for an
736 # intermediate snapshot.
737 # intermediate snapshot.
737 #
738 #
738 # It give a chance to reuse a delta chain unrelated to the current
739 # It give a chance to reuse a delta chain unrelated to the current
739 # revisions instead of starting our own. Without such re-use,
740 # revisions instead of starting our own. Without such re-use,
740 # topological branches would keep reopening new full chains. Creating
741 # topological branches would keep reopening new full chains. Creating
741 # more and more snapshot as the repository grow.
742 # more and more snapshot as the repository grow.
742 yield tuple(snapshots[nullrev])
743 yield tuple(snapshots[nullrev])
743
744
744 if not sparse:
745 if not sparse:
745 # other approach failed try against prev to hopefully save us a
746 # other approach failed try against prev to hopefully save us a
746 # fulltext.
747 # fulltext.
747 yield (prev,)
748 yield (prev,)
748
749
749 class deltacomputer(object):
750 class deltacomputer(object):
750 def __init__(self, revlog):
751 def __init__(self, revlog):
751 self.revlog = revlog
752 self.revlog = revlog
752
753
753 def buildtext(self, revinfo, fh):
754 def buildtext(self, revinfo, fh):
754 """Builds a fulltext version of a revision
755 """Builds a fulltext version of a revision
755
756
756 revinfo: _revisioninfo instance that contains all needed info
757 revinfo: _revisioninfo instance that contains all needed info
757 fh: file handle to either the .i or the .d revlog file,
758 fh: file handle to either the .i or the .d revlog file,
758 depending on whether it is inlined or not
759 depending on whether it is inlined or not
759 """
760 """
760 btext = revinfo.btext
761 btext = revinfo.btext
761 if btext[0] is not None:
762 if btext[0] is not None:
762 return btext[0]
763 return btext[0]
763
764
764 revlog = self.revlog
765 revlog = self.revlog
765 cachedelta = revinfo.cachedelta
766 cachedelta = revinfo.cachedelta
766 baserev = cachedelta[0]
767 baserev = cachedelta[0]
767 delta = cachedelta[1]
768 delta = cachedelta[1]
768
769
769 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
770 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
770 revinfo.p1, revinfo.p2,
771 revinfo.p1, revinfo.p2,
771 revinfo.flags, revinfo.node)
772 revinfo.flags, revinfo.node)
772 return fulltext
773 return fulltext
773
774
774 def _builddeltadiff(self, base, revinfo, fh):
775 def _builddeltadiff(self, base, revinfo, fh):
775 revlog = self.revlog
776 revlog = self.revlog
776 t = self.buildtext(revinfo, fh)
777 t = self.buildtext(revinfo, fh)
777 if revlog.iscensored(base):
778 if revlog.iscensored(base):
778 # deltas based on a censored revision must replace the
779 # deltas based on a censored revision must replace the
779 # full content in one patch, so delta works everywhere
780 # full content in one patch, so delta works everywhere
780 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
781 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
781 delta = header + t
782 delta = header + t
782 else:
783 else:
783 ptext = revlog.revision(base, _df=fh, raw=True)
784 ptext = revlog.revision(base, _df=fh, raw=True)
784 delta = mdiff.textdiff(ptext, t)
785 delta = mdiff.textdiff(ptext, t)
785
786
786 return delta
787 return delta
787
788
788 def _builddeltainfo(self, revinfo, base, fh):
789 def _builddeltainfo(self, revinfo, base, fh):
789 # can we use the cached delta?
790 # can we use the cached delta?
790 delta = None
791 delta = None
791 if revinfo.cachedelta:
792 if revinfo.cachedelta:
792 cachebase, cachediff = revinfo.cachedelta
793 cachebase, cachediff = revinfo.cachedelta
793 #check if the diff still apply
794 #check if the diff still apply
794 currentbase = cachebase
795 currentbase = cachebase
795 while (currentbase != nullrev
796 while (currentbase != nullrev
796 and currentbase != base
797 and currentbase != base
797 and self.revlog.length(currentbase) == 0):
798 and self.revlog.length(currentbase) == 0):
798 currentbase = self.revlog.deltaparent(currentbase)
799 currentbase = self.revlog.deltaparent(currentbase)
799 if currentbase == base:
800 if currentbase == base:
800 delta = revinfo.cachedelta[1]
801 delta = revinfo.cachedelta[1]
801 if delta is None:
802 if delta is None:
802 delta = self._builddeltadiff(base, revinfo, fh)
803 delta = self._builddeltadiff(base, revinfo, fh)
803 revlog = self.revlog
804 revlog = self.revlog
804 header, data = revlog.compress(delta)
805 header, data = revlog.compress(delta)
805 deltalen = len(header) + len(data)
806 deltalen = len(header) + len(data)
806 chainbase = revlog.chainbase(base)
807 chainbase = revlog.chainbase(base)
807 offset = revlog.end(len(revlog) - 1)
808 offset = revlog.end(len(revlog) - 1)
808 dist = deltalen + offset - revlog.start(chainbase)
809 dist = deltalen + offset - revlog.start(chainbase)
809 if revlog._generaldelta:
810 if revlog._generaldelta:
810 deltabase = base
811 deltabase = base
811 else:
812 else:
812 deltabase = chainbase
813 deltabase = chainbase
813 chainlen, compresseddeltalen = revlog._chaininfo(base)
814 chainlen, compresseddeltalen = revlog._chaininfo(base)
814 chainlen += 1
815 chainlen += 1
815 compresseddeltalen += deltalen
816 compresseddeltalen += deltalen
816
817
817 revlog = self.revlog
818 revlog = self.revlog
818 snapshotdepth = None
819 snapshotdepth = None
819 if deltabase == nullrev:
820 if deltabase == nullrev:
820 snapshotdepth = 0
821 snapshotdepth = 0
821 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
822 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
822 # A delta chain should always be one full snapshot,
823 # A delta chain should always be one full snapshot,
823 # zero or more semi-snapshots, and zero or more deltas
824 # zero or more semi-snapshots, and zero or more deltas
824 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
825 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
825 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
826 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
826 snapshotdepth = len(revlog._deltachain(deltabase)[0])
827 snapshotdepth = len(revlog._deltachain(deltabase)[0])
827
828
828 return _deltainfo(dist, deltalen, (header, data), deltabase,
829 return _deltainfo(dist, deltalen, (header, data), deltabase,
829 chainbase, chainlen, compresseddeltalen,
830 chainbase, chainlen, compresseddeltalen,
830 snapshotdepth)
831 snapshotdepth)
831
832
832 def _fullsnapshotinfo(self, fh, revinfo):
833 def _fullsnapshotinfo(self, fh, revinfo):
833 curr = len(self.revlog)
834 curr = len(self.revlog)
834 rawtext = self.buildtext(revinfo, fh)
835 rawtext = self.buildtext(revinfo, fh)
835 data = self.revlog.compress(rawtext)
836 data = self.revlog.compress(rawtext)
836 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
837 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
837 deltabase = chainbase = curr
838 deltabase = chainbase = curr
838 snapshotdepth = 0
839 snapshotdepth = 0
839 chainlen = 1
840 chainlen = 1
840
841
841 return _deltainfo(dist, deltalen, data, deltabase,
842 return _deltainfo(dist, deltalen, data, deltabase,
842 chainbase, chainlen, compresseddeltalen,
843 chainbase, chainlen, compresseddeltalen,
843 snapshotdepth)
844 snapshotdepth)
844
845
845 def finddeltainfo(self, revinfo, fh):
846 def finddeltainfo(self, revinfo, fh):
846 """Find an acceptable delta against a candidate revision
847 """Find an acceptable delta against a candidate revision
847
848
848 revinfo: information about the revision (instance of _revisioninfo)
849 revinfo: information about the revision (instance of _revisioninfo)
849 fh: file handle to either the .i or the .d revlog file,
850 fh: file handle to either the .i or the .d revlog file,
850 depending on whether it is inlined or not
851 depending on whether it is inlined or not
851
852
852 Returns the first acceptable candidate revision, as ordered by
853 Returns the first acceptable candidate revision, as ordered by
853 _candidategroups
854 _candidategroups
854
855
855 If no suitable deltabase is found, we return delta info for a full
856 If no suitable deltabase is found, we return delta info for a full
856 snapshot.
857 snapshot.
857 """
858 """
858 if not revinfo.textlen:
859 if not revinfo.textlen:
859 return self._fullsnapshotinfo(fh, revinfo)
860 return self._fullsnapshotinfo(fh, revinfo)
860
861
861 # no delta for flag processor revision (see "candelta" for why)
862 # no delta for flag processor revision (see "candelta" for why)
862 # not calling candelta since only one revision needs test, also to
863 # not calling candelta since only one revision needs test, also to
863 # avoid overhead fetching flags again.
864 # avoid overhead fetching flags again.
864 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
865 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
865 return self._fullsnapshotinfo(fh, revinfo)
866 return self._fullsnapshotinfo(fh, revinfo)
866
867
867 cachedelta = revinfo.cachedelta
868 cachedelta = revinfo.cachedelta
868 p1 = revinfo.p1
869 p1 = revinfo.p1
869 p2 = revinfo.p2
870 p2 = revinfo.p2
870 revlog = self.revlog
871 revlog = self.revlog
871
872
872 deltainfo = None
873 deltainfo = None
873 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
874 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
874 groups = _candidategroups(self.revlog, revinfo.textlen,
875 groups = _candidategroups(self.revlog, revinfo.textlen,
875 p1r, p2r, cachedelta)
876 p1r, p2r, cachedelta)
876 candidaterevs = next(groups)
877 candidaterevs = next(groups)
877 while candidaterevs is not None:
878 while candidaterevs is not None:
878 nominateddeltas = []
879 nominateddeltas = []
879 if deltainfo is not None:
880 if deltainfo is not None:
880 # if we already found a good delta,
881 # if we already found a good delta,
881 # challenge it against refined candidates
882 # challenge it against refined candidates
882 nominateddeltas.append(deltainfo)
883 nominateddeltas.append(deltainfo)
883 for candidaterev in candidaterevs:
884 for candidaterev in candidaterevs:
884 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
885 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
885 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
886 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
886 nominateddeltas.append(candidatedelta)
887 nominateddeltas.append(candidatedelta)
887 if nominateddeltas:
888 if nominateddeltas:
888 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
889 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
889 if deltainfo is not None:
890 if deltainfo is not None:
890 candidaterevs = groups.send(deltainfo.base)
891 candidaterevs = groups.send(deltainfo.base)
891 else:
892 else:
892 candidaterevs = next(groups)
893 candidaterevs = next(groups)
893
894
894 if deltainfo is None:
895 if deltainfo is None:
895 deltainfo = self._fullsnapshotinfo(fh, revinfo)
896 deltainfo = self._fullsnapshotinfo(fh, revinfo)
896 return deltainfo
897 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now