##// END OF EJS Templates
doctest: add a `issnapshot` method to _testrevlog...
Boris Feld -
r40677:85b14f0d default
parent child Browse files
Show More
@@ -1,895 +1,899 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 )
33 )
34
34
35 # maximum <delta-chain-data>/<revision-text-length> ratio
35 # maximum <delta-chain-data>/<revision-text-length> ratio
36 LIMIT_DELTA2TEXT = 2
36 LIMIT_DELTA2TEXT = 2
37
37
38 class _testrevlog(object):
38 class _testrevlog(object):
39 """minimalist fake revlog to use in doctests"""
39 """minimalist fake revlog to use in doctests"""
40
40
41 def __init__(self, data, density=0.5, mingap=0):
41 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 """data is an list of revision payload boundaries"""
42 """data is an list of revision payload boundaries"""
43 self._data = data
43 self._data = data
44 self._srdensitythreshold = density
44 self._srdensitythreshold = density
45 self._srmingapsize = mingap
45 self._srmingapsize = mingap
46 self._snapshot = set(snapshot)
46
47
47 def start(self, rev):
48 def start(self, rev):
48 if rev == 0:
49 if rev == 0:
49 return 0
50 return 0
50 return self._data[rev - 1]
51 return self._data[rev - 1]
51
52
52 def end(self, rev):
53 def end(self, rev):
53 return self._data[rev]
54 return self._data[rev]
54
55
55 def length(self, rev):
56 def length(self, rev):
56 return self.end(rev) - self.start(rev)
57 return self.end(rev) - self.start(rev)
57
58
58 def __len__(self):
59 def __len__(self):
59 return len(self._data)
60 return len(self._data)
60
61
62 def issnapshot(self, rev):
63 return rev in self._snapshot
64
61 def slicechunk(revlog, revs, targetsize=None):
65 def slicechunk(revlog, revs, targetsize=None):
62 """slice revs to reduce the amount of unrelated data to be read from disk.
66 """slice revs to reduce the amount of unrelated data to be read from disk.
63
67
64 ``revs`` is sliced into groups that should be read in one time.
68 ``revs`` is sliced into groups that should be read in one time.
65 Assume that revs are sorted.
69 Assume that revs are sorted.
66
70
67 The initial chunk is sliced until the overall density (payload/chunks-span
71 The initial chunk is sliced until the overall density (payload/chunks-span
68 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
72 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
69 `revlog._srmingapsize` is skipped.
73 `revlog._srmingapsize` is skipped.
70
74
71 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
75 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
72 For consistency with other slicing choice, this limit won't go lower than
76 For consistency with other slicing choice, this limit won't go lower than
73 `revlog._srmingapsize`.
77 `revlog._srmingapsize`.
74
78
75 If individual revisions chunk are larger than this limit, they will still
79 If individual revisions chunk are larger than this limit, they will still
76 be raised individually.
80 be raised individually.
77
81
78 >>> revlog = _testrevlog([
82 >>> revlog = _testrevlog([
79 ... 5, #00 (5)
83 ... 5, #00 (5)
80 ... 10, #01 (5)
84 ... 10, #01 (5)
81 ... 12, #02 (2)
85 ... 12, #02 (2)
82 ... 12, #03 (empty)
86 ... 12, #03 (empty)
83 ... 27, #04 (15)
87 ... 27, #04 (15)
84 ... 31, #05 (4)
88 ... 31, #05 (4)
85 ... 31, #06 (empty)
89 ... 31, #06 (empty)
86 ... 42, #07 (11)
90 ... 42, #07 (11)
87 ... 47, #08 (5)
91 ... 47, #08 (5)
88 ... 47, #09 (empty)
92 ... 47, #09 (empty)
89 ... 48, #10 (1)
93 ... 48, #10 (1)
90 ... 51, #11 (3)
94 ... 51, #11 (3)
91 ... 74, #12 (23)
95 ... 74, #12 (23)
92 ... 85, #13 (11)
96 ... 85, #13 (11)
93 ... 86, #14 (1)
97 ... 86, #14 (1)
94 ... 91, #15 (5)
98 ... 91, #15 (5)
95 ... ])
99 ... ])
96
100
97 >>> list(slicechunk(revlog, list(range(16))))
101 >>> list(slicechunk(revlog, list(range(16))))
98 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
102 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
99 >>> list(slicechunk(revlog, [0, 15]))
103 >>> list(slicechunk(revlog, [0, 15]))
100 [[0], [15]]
104 [[0], [15]]
101 >>> list(slicechunk(revlog, [0, 11, 15]))
105 >>> list(slicechunk(revlog, [0, 11, 15]))
102 [[0], [11], [15]]
106 [[0], [11], [15]]
103 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
107 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
104 [[0], [11, 13, 15]]
108 [[0], [11, 13, 15]]
105 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
109 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
106 [[1, 2], [5, 8, 10, 11], [14]]
110 [[1, 2], [5, 8, 10, 11], [14]]
107
111
108 Slicing with a maximum chunk size
112 Slicing with a maximum chunk size
109 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
113 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
110 [[0], [11], [13], [15]]
114 [[0], [11], [13], [15]]
111 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
115 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
112 [[0], [11], [13, 15]]
116 [[0], [11], [13, 15]]
113 """
117 """
114 if targetsize is not None:
118 if targetsize is not None:
115 targetsize = max(targetsize, revlog._srmingapsize)
119 targetsize = max(targetsize, revlog._srmingapsize)
116 # targetsize should not be specified when evaluating delta candidates:
120 # targetsize should not be specified when evaluating delta candidates:
117 # * targetsize is used to ensure we stay within specification when reading,
121 # * targetsize is used to ensure we stay within specification when reading,
118 for chunk in _slicechunktodensity(revlog, revs,
122 for chunk in _slicechunktodensity(revlog, revs,
119 revlog._srdensitythreshold,
123 revlog._srdensitythreshold,
120 revlog._srmingapsize):
124 revlog._srmingapsize):
121 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
125 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
122 yield subchunk
126 yield subchunk
123
127
124 def _slicechunktosize(revlog, revs, targetsize=None):
128 def _slicechunktosize(revlog, revs, targetsize=None):
125 """slice revs to match the target size
129 """slice revs to match the target size
126
130
127 This is intended to be used on chunk that density slicing selected by that
131 This is intended to be used on chunk that density slicing selected by that
128 are still too large compared to the read garantee of revlog. This might
132 are still too large compared to the read garantee of revlog. This might
129 happens when "minimal gap size" interrupted the slicing or when chain are
133 happens when "minimal gap size" interrupted the slicing or when chain are
130 built in a way that create large blocks next to each other.
134 built in a way that create large blocks next to each other.
131
135
132 >>> revlog = _testrevlog([
136 >>> revlog = _testrevlog([
133 ... 3, #0 (3)
137 ... 3, #0 (3)
134 ... 5, #1 (2)
138 ... 5, #1 (2)
135 ... 6, #2 (1)
139 ... 6, #2 (1)
136 ... 8, #3 (2)
140 ... 8, #3 (2)
137 ... 8, #4 (empty)
141 ... 8, #4 (empty)
138 ... 11, #5 (3)
142 ... 11, #5 (3)
139 ... 12, #6 (1)
143 ... 12, #6 (1)
140 ... 13, #7 (1)
144 ... 13, #7 (1)
141 ... 14, #8 (1)
145 ... 14, #8 (1)
142 ... ])
146 ... ])
143
147
144 Cases where chunk is already small enough
148 Cases where chunk is already small enough
145 >>> list(_slicechunktosize(revlog, [0], 3))
149 >>> list(_slicechunktosize(revlog, [0], 3))
146 [[0]]
150 [[0]]
147 >>> list(_slicechunktosize(revlog, [6, 7], 3))
151 >>> list(_slicechunktosize(revlog, [6, 7], 3))
148 [[6, 7]]
152 [[6, 7]]
149 >>> list(_slicechunktosize(revlog, [0], None))
153 >>> list(_slicechunktosize(revlog, [0], None))
150 [[0]]
154 [[0]]
151 >>> list(_slicechunktosize(revlog, [6, 7], None))
155 >>> list(_slicechunktosize(revlog, [6, 7], None))
152 [[6, 7]]
156 [[6, 7]]
153
157
154 cases where we need actual slicing
158 cases where we need actual slicing
155 >>> list(_slicechunktosize(revlog, [0, 1], 3))
159 >>> list(_slicechunktosize(revlog, [0, 1], 3))
156 [[0], [1]]
160 [[0], [1]]
157 >>> list(_slicechunktosize(revlog, [1, 3], 3))
161 >>> list(_slicechunktosize(revlog, [1, 3], 3))
158 [[1], [3]]
162 [[1], [3]]
159 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
163 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
160 [[1, 2], [3]]
164 [[1, 2], [3]]
161 >>> list(_slicechunktosize(revlog, [3, 5], 3))
165 >>> list(_slicechunktosize(revlog, [3, 5], 3))
162 [[3], [5]]
166 [[3], [5]]
163 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
167 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
164 [[3], [5]]
168 [[3], [5]]
165 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
169 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
166 [[5], [6, 7, 8]]
170 [[5], [6, 7, 8]]
167 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
171 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
168 [[0], [1, 2], [3], [5], [6, 7, 8]]
172 [[0], [1, 2], [3], [5], [6, 7, 8]]
169
173
170 Case with too large individual chunk (must return valid chunk)
174 Case with too large individual chunk (must return valid chunk)
171 >>> list(_slicechunktosize(revlog, [0, 1], 2))
175 >>> list(_slicechunktosize(revlog, [0, 1], 2))
172 [[0], [1]]
176 [[0], [1]]
173 >>> list(_slicechunktosize(revlog, [1, 3], 1))
177 >>> list(_slicechunktosize(revlog, [1, 3], 1))
174 [[1], [3]]
178 [[1], [3]]
175 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
179 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
176 [[3], [5]]
180 [[3], [5]]
177 """
181 """
178 assert targetsize is None or 0 <= targetsize
182 assert targetsize is None or 0 <= targetsize
179 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
183 if targetsize is None or segmentspan(revlog, revs) <= targetsize:
180 yield revs
184 yield revs
181 return
185 return
182
186
183 startrevidx = 0
187 startrevidx = 0
184 startdata = revlog.start(revs[0])
188 startdata = revlog.start(revs[0])
185 endrevidx = 0
189 endrevidx = 0
186 iterrevs = enumerate(revs)
190 iterrevs = enumerate(revs)
187 next(iterrevs) # skip first rev.
191 next(iterrevs) # skip first rev.
188 for idx, r in iterrevs:
192 for idx, r in iterrevs:
189 span = revlog.end(r) - startdata
193 span = revlog.end(r) - startdata
190 if span <= targetsize:
194 if span <= targetsize:
191 endrevidx = idx
195 endrevidx = idx
192 else:
196 else:
193 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
197 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx + 1)
194 if chunk:
198 if chunk:
195 yield chunk
199 yield chunk
196 startrevidx = idx
200 startrevidx = idx
197 startdata = revlog.start(r)
201 startdata = revlog.start(r)
198 endrevidx = idx
202 endrevidx = idx
199 yield _trimchunk(revlog, revs, startrevidx)
203 yield _trimchunk(revlog, revs, startrevidx)
200
204
201 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
205 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
202 mingapsize=0):
206 mingapsize=0):
203 """slice revs to reduce the amount of unrelated data to be read from disk.
207 """slice revs to reduce the amount of unrelated data to be read from disk.
204
208
205 ``revs`` is sliced into groups that should be read in one time.
209 ``revs`` is sliced into groups that should be read in one time.
206 Assume that revs are sorted.
210 Assume that revs are sorted.
207
211
208 The initial chunk is sliced until the overall density (payload/chunks-span
212 The initial chunk is sliced until the overall density (payload/chunks-span
209 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
213 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
210 skipped.
214 skipped.
211
215
212 >>> revlog = _testrevlog([
216 >>> revlog = _testrevlog([
213 ... 5, #00 (5)
217 ... 5, #00 (5)
214 ... 10, #01 (5)
218 ... 10, #01 (5)
215 ... 12, #02 (2)
219 ... 12, #02 (2)
216 ... 12, #03 (empty)
220 ... 12, #03 (empty)
217 ... 27, #04 (15)
221 ... 27, #04 (15)
218 ... 31, #05 (4)
222 ... 31, #05 (4)
219 ... 31, #06 (empty)
223 ... 31, #06 (empty)
220 ... 42, #07 (11)
224 ... 42, #07 (11)
221 ... 47, #08 (5)
225 ... 47, #08 (5)
222 ... 47, #09 (empty)
226 ... 47, #09 (empty)
223 ... 48, #10 (1)
227 ... 48, #10 (1)
224 ... 51, #11 (3)
228 ... 51, #11 (3)
225 ... 74, #12 (23)
229 ... 74, #12 (23)
226 ... 85, #13 (11)
230 ... 85, #13 (11)
227 ... 86, #14 (1)
231 ... 86, #14 (1)
228 ... 91, #15 (5)
232 ... 91, #15 (5)
229 ... ])
233 ... ])
230
234
231 >>> list(_slicechunktodensity(revlog, list(range(16))))
235 >>> list(_slicechunktodensity(revlog, list(range(16))))
232 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
236 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
233 >>> list(_slicechunktodensity(revlog, [0, 15]))
237 >>> list(_slicechunktodensity(revlog, [0, 15]))
234 [[0], [15]]
238 [[0], [15]]
235 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
239 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
236 [[0], [11], [15]]
240 [[0], [11], [15]]
237 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
241 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
238 [[0], [11, 13, 15]]
242 [[0], [11, 13, 15]]
239 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
243 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
240 [[1, 2], [5, 8, 10, 11], [14]]
244 [[1, 2], [5, 8, 10, 11], [14]]
241 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
245 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
242 ... mingapsize=20))
246 ... mingapsize=20))
243 [[1, 2, 3, 5, 8, 10, 11], [14]]
247 [[1, 2, 3, 5, 8, 10, 11], [14]]
244 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
248 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
245 ... targetdensity=0.95))
249 ... targetdensity=0.95))
246 [[1, 2], [5], [8, 10, 11], [14]]
250 [[1, 2], [5], [8, 10, 11], [14]]
247 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
251 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
248 ... targetdensity=0.95, mingapsize=12))
252 ... targetdensity=0.95, mingapsize=12))
249 [[1, 2], [5, 8, 10, 11], [14]]
253 [[1, 2], [5, 8, 10, 11], [14]]
250 """
254 """
251 start = revlog.start
255 start = revlog.start
252 length = revlog.length
256 length = revlog.length
253
257
254 if len(revs) <= 1:
258 if len(revs) <= 1:
255 yield revs
259 yield revs
256 return
260 return
257
261
258 deltachainspan = segmentspan(revlog, revs)
262 deltachainspan = segmentspan(revlog, revs)
259
263
260 if deltachainspan < mingapsize:
264 if deltachainspan < mingapsize:
261 yield revs
265 yield revs
262 return
266 return
263
267
264 readdata = deltachainspan
268 readdata = deltachainspan
265 chainpayload = sum(length(r) for r in revs)
269 chainpayload = sum(length(r) for r in revs)
266
270
267 if deltachainspan:
271 if deltachainspan:
268 density = chainpayload / float(deltachainspan)
272 density = chainpayload / float(deltachainspan)
269 else:
273 else:
270 density = 1.0
274 density = 1.0
271
275
272 if density >= targetdensity:
276 if density >= targetdensity:
273 yield revs
277 yield revs
274 return
278 return
275
279
276 # Store the gaps in a heap to have them sorted by decreasing size
280 # Store the gaps in a heap to have them sorted by decreasing size
277 gaps = []
281 gaps = []
278 prevend = None
282 prevend = None
279 for i, rev in enumerate(revs):
283 for i, rev in enumerate(revs):
280 revstart = start(rev)
284 revstart = start(rev)
281 revlen = length(rev)
285 revlen = length(rev)
282
286
283 # Skip empty revisions to form larger holes
287 # Skip empty revisions to form larger holes
284 if revlen == 0:
288 if revlen == 0:
285 continue
289 continue
286
290
287 if prevend is not None:
291 if prevend is not None:
288 gapsize = revstart - prevend
292 gapsize = revstart - prevend
289 # only consider holes that are large enough
293 # only consider holes that are large enough
290 if gapsize > mingapsize:
294 if gapsize > mingapsize:
291 gaps.append((gapsize, i))
295 gaps.append((gapsize, i))
292
296
293 prevend = revstart + revlen
297 prevend = revstart + revlen
294 # sort the gaps to pop them from largest to small
298 # sort the gaps to pop them from largest to small
295 gaps.sort()
299 gaps.sort()
296
300
297 # Collect the indices of the largest holes until the density is acceptable
301 # Collect the indices of the largest holes until the density is acceptable
298 selected = []
302 selected = []
299 while gaps and density < targetdensity:
303 while gaps and density < targetdensity:
300 gapsize, gapidx = gaps.pop()
304 gapsize, gapidx = gaps.pop()
301
305
302 selected.append(gapidx)
306 selected.append(gapidx)
303
307
304 # the gap sizes are stored as negatives to be sorted decreasingly
308 # the gap sizes are stored as negatives to be sorted decreasingly
305 # by the heap
309 # by the heap
306 readdata -= gapsize
310 readdata -= gapsize
307 if readdata > 0:
311 if readdata > 0:
308 density = chainpayload / float(readdata)
312 density = chainpayload / float(readdata)
309 else:
313 else:
310 density = 1.0
314 density = 1.0
311 selected.sort()
315 selected.sort()
312
316
313 # Cut the revs at collected indices
317 # Cut the revs at collected indices
314 previdx = 0
318 previdx = 0
315 for idx in selected:
319 for idx in selected:
316
320
317 chunk = _trimchunk(revlog, revs, previdx, idx)
321 chunk = _trimchunk(revlog, revs, previdx, idx)
318 if chunk:
322 if chunk:
319 yield chunk
323 yield chunk
320
324
321 previdx = idx
325 previdx = idx
322
326
323 chunk = _trimchunk(revlog, revs, previdx)
327 chunk = _trimchunk(revlog, revs, previdx)
324 if chunk:
328 if chunk:
325 yield chunk
329 yield chunk
326
330
327 def _trimchunk(revlog, revs, startidx, endidx=None):
331 def _trimchunk(revlog, revs, startidx, endidx=None):
328 """returns revs[startidx:endidx] without empty trailing revs
332 """returns revs[startidx:endidx] without empty trailing revs
329
333
330 Doctest Setup
334 Doctest Setup
331 >>> revlog = _testrevlog([
335 >>> revlog = _testrevlog([
332 ... 5, #0
336 ... 5, #0
333 ... 10, #1
337 ... 10, #1
334 ... 12, #2
338 ... 12, #2
335 ... 12, #3 (empty)
339 ... 12, #3 (empty)
336 ... 17, #4
340 ... 17, #4
337 ... 21, #5
341 ... 21, #5
338 ... 21, #6 (empty)
342 ... 21, #6 (empty)
339 ... ])
343 ... ])
340
344
341 Contiguous cases:
345 Contiguous cases:
342 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
346 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
343 [0, 1, 2, 3, 4, 5]
347 [0, 1, 2, 3, 4, 5]
344 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
348 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
345 [0, 1, 2, 3, 4]
349 [0, 1, 2, 3, 4]
346 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
350 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
347 [0, 1, 2]
351 [0, 1, 2]
348 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
352 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
349 [2]
353 [2]
350 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
354 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
351 [3, 4, 5]
355 [3, 4, 5]
352 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
356 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
353 [3, 4]
357 [3, 4]
354
358
355 Discontiguous cases:
359 Discontiguous cases:
356 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
360 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
357 [1, 3, 5]
361 [1, 3, 5]
358 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
362 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
359 [1]
363 [1]
360 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
364 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
361 [3, 5]
365 [3, 5]
362 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
366 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
363 [3, 5]
367 [3, 5]
364 """
368 """
365 length = revlog.length
369 length = revlog.length
366
370
367 if endidx is None:
371 if endidx is None:
368 endidx = len(revs)
372 endidx = len(revs)
369
373
370 # If we have a non-emtpy delta candidate, there are nothing to trim
374 # If we have a non-emtpy delta candidate, there are nothing to trim
371 if revs[endidx - 1] < len(revlog):
375 if revs[endidx - 1] < len(revlog):
372 # Trim empty revs at the end, except the very first revision of a chain
376 # Trim empty revs at the end, except the very first revision of a chain
373 while (endidx > 1
377 while (endidx > 1
374 and endidx > startidx
378 and endidx > startidx
375 and length(revs[endidx - 1]) == 0):
379 and length(revs[endidx - 1]) == 0):
376 endidx -= 1
380 endidx -= 1
377
381
378 return revs[startidx:endidx]
382 return revs[startidx:endidx]
379
383
380 def segmentspan(revlog, revs):
384 def segmentspan(revlog, revs):
381 """Get the byte span of a segment of revisions
385 """Get the byte span of a segment of revisions
382
386
383 revs is a sorted array of revision numbers
387 revs is a sorted array of revision numbers
384
388
385 >>> revlog = _testrevlog([
389 >>> revlog = _testrevlog([
386 ... 5, #0
390 ... 5, #0
387 ... 10, #1
391 ... 10, #1
388 ... 12, #2
392 ... 12, #2
389 ... 12, #3 (empty)
393 ... 12, #3 (empty)
390 ... 17, #4
394 ... 17, #4
391 ... ])
395 ... ])
392
396
393 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
397 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
394 17
398 17
395 >>> segmentspan(revlog, [0, 4])
399 >>> segmentspan(revlog, [0, 4])
396 17
400 17
397 >>> segmentspan(revlog, [3, 4])
401 >>> segmentspan(revlog, [3, 4])
398 5
402 5
399 >>> segmentspan(revlog, [1, 2, 3,])
403 >>> segmentspan(revlog, [1, 2, 3,])
400 7
404 7
401 >>> segmentspan(revlog, [1, 3])
405 >>> segmentspan(revlog, [1, 3])
402 7
406 7
403 """
407 """
404 if not revs:
408 if not revs:
405 return 0
409 return 0
406 end = revlog.end(revs[-1])
410 end = revlog.end(revs[-1])
407 return end - revlog.start(revs[0])
411 return end - revlog.start(revs[0])
408
412
409 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
413 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
410 """build full text from a (base, delta) pair and other metadata"""
414 """build full text from a (base, delta) pair and other metadata"""
411 # special case deltas which replace entire base; no need to decode
415 # special case deltas which replace entire base; no need to decode
412 # base revision. this neatly avoids censored bases, which throw when
416 # base revision. this neatly avoids censored bases, which throw when
413 # they're decoded.
417 # they're decoded.
414 hlen = struct.calcsize(">lll")
418 hlen = struct.calcsize(">lll")
415 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
419 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
416 len(delta) - hlen):
420 len(delta) - hlen):
417 fulltext = delta[hlen:]
421 fulltext = delta[hlen:]
418 else:
422 else:
419 # deltabase is rawtext before changed by flag processors, which is
423 # deltabase is rawtext before changed by flag processors, which is
420 # equivalent to non-raw text
424 # equivalent to non-raw text
421 basetext = revlog.revision(baserev, _df=fh, raw=False)
425 basetext = revlog.revision(baserev, _df=fh, raw=False)
422 fulltext = mdiff.patch(basetext, delta)
426 fulltext = mdiff.patch(basetext, delta)
423
427
424 try:
428 try:
425 res = revlog._processflags(fulltext, flags, 'read', raw=True)
429 res = revlog._processflags(fulltext, flags, 'read', raw=True)
426 fulltext, validatehash = res
430 fulltext, validatehash = res
427 if validatehash:
431 if validatehash:
428 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
432 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
429 if flags & REVIDX_ISCENSORED:
433 if flags & REVIDX_ISCENSORED:
430 raise error.StorageError(_('node %s is not censored') %
434 raise error.StorageError(_('node %s is not censored') %
431 expectednode)
435 expectednode)
432 except error.CensoredNodeError:
436 except error.CensoredNodeError:
433 # must pass the censored index flag to add censored revisions
437 # must pass the censored index flag to add censored revisions
434 if not flags & REVIDX_ISCENSORED:
438 if not flags & REVIDX_ISCENSORED:
435 raise
439 raise
436 return fulltext
440 return fulltext
437
441
438 @attr.s(slots=True, frozen=True)
442 @attr.s(slots=True, frozen=True)
439 class _deltainfo(object):
443 class _deltainfo(object):
440 distance = attr.ib()
444 distance = attr.ib()
441 deltalen = attr.ib()
445 deltalen = attr.ib()
442 data = attr.ib()
446 data = attr.ib()
443 base = attr.ib()
447 base = attr.ib()
444 chainbase = attr.ib()
448 chainbase = attr.ib()
445 chainlen = attr.ib()
449 chainlen = attr.ib()
446 compresseddeltalen = attr.ib()
450 compresseddeltalen = attr.ib()
447 snapshotdepth = attr.ib()
451 snapshotdepth = attr.ib()
448
452
449 def isgooddeltainfo(revlog, deltainfo, revinfo):
453 def isgooddeltainfo(revlog, deltainfo, revinfo):
450 """Returns True if the given delta is good. Good means that it is within
454 """Returns True if the given delta is good. Good means that it is within
451 the disk span, disk size, and chain length bounds that we know to be
455 the disk span, disk size, and chain length bounds that we know to be
452 performant."""
456 performant."""
453 if deltainfo is None:
457 if deltainfo is None:
454 return False
458 return False
455
459
456 # - 'deltainfo.distance' is the distance from the base revision --
460 # - 'deltainfo.distance' is the distance from the base revision --
457 # bounding it limits the amount of I/O we need to do.
461 # bounding it limits the amount of I/O we need to do.
458 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
462 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
459 # deltas we need to apply -- bounding it limits the amount of CPU
463 # deltas we need to apply -- bounding it limits the amount of CPU
460 # we consume.
464 # we consume.
461
465
462 textlen = revinfo.textlen
466 textlen = revinfo.textlen
463 defaultmax = textlen * 4
467 defaultmax = textlen * 4
464 maxdist = revlog._maxdeltachainspan
468 maxdist = revlog._maxdeltachainspan
465 if not maxdist:
469 if not maxdist:
466 maxdist = deltainfo.distance # ensure the conditional pass
470 maxdist = deltainfo.distance # ensure the conditional pass
467 maxdist = max(maxdist, defaultmax)
471 maxdist = max(maxdist, defaultmax)
468
472
469 # Bad delta from read span:
473 # Bad delta from read span:
470 #
474 #
471 # If the span of data read is larger than the maximum allowed.
475 # If the span of data read is larger than the maximum allowed.
472 #
476 #
473 # In the sparse-revlog case, we rely on the associated "sparse reading"
477 # In the sparse-revlog case, we rely on the associated "sparse reading"
474 # to avoid issue related to the span of data. In theory, it would be
478 # to avoid issue related to the span of data. In theory, it would be
475 # possible to build pathological revlog where delta pattern would lead
479 # possible to build pathological revlog where delta pattern would lead
476 # to too many reads. However, they do not happen in practice at all. So
480 # to too many reads. However, they do not happen in practice at all. So
477 # we skip the span check entirely.
481 # we skip the span check entirely.
478 if not revlog._sparserevlog and maxdist < deltainfo.distance:
482 if not revlog._sparserevlog and maxdist < deltainfo.distance:
479 return False
483 return False
480
484
481 # Bad delta from new delta size:
485 # Bad delta from new delta size:
482 #
486 #
483 # If the delta size is larger than the target text, storing the
487 # If the delta size is larger than the target text, storing the
484 # delta will be inefficient.
488 # delta will be inefficient.
485 if textlen < deltainfo.deltalen:
489 if textlen < deltainfo.deltalen:
486 return False
490 return False
487
491
488 # Bad delta from cumulated payload size:
492 # Bad delta from cumulated payload size:
489 #
493 #
490 # If the sum of delta get larger than K * target text length.
494 # If the sum of delta get larger than K * target text length.
491 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
495 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
492 return False
496 return False
493
497
494 # Bad delta from chain length:
498 # Bad delta from chain length:
495 #
499 #
496 # If the number of delta in the chain gets too high.
500 # If the number of delta in the chain gets too high.
497 if (revlog._maxchainlen
501 if (revlog._maxchainlen
498 and revlog._maxchainlen < deltainfo.chainlen):
502 and revlog._maxchainlen < deltainfo.chainlen):
499 return False
503 return False
500
504
501 # bad delta from intermediate snapshot size limit
505 # bad delta from intermediate snapshot size limit
502 #
506 #
503 # If an intermediate snapshot size is higher than the limit. The
507 # If an intermediate snapshot size is higher than the limit. The
504 # limit exist to prevent endless chain of intermediate delta to be
508 # limit exist to prevent endless chain of intermediate delta to be
505 # created.
509 # created.
506 if (deltainfo.snapshotdepth is not None and
510 if (deltainfo.snapshotdepth is not None and
507 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
511 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
508 return False
512 return False
509
513
510 # bad delta if new intermediate snapshot is larger than the previous
514 # bad delta if new intermediate snapshot is larger than the previous
511 # snapshot
515 # snapshot
512 if (deltainfo.snapshotdepth
516 if (deltainfo.snapshotdepth
513 and revlog.length(deltainfo.base) < deltainfo.deltalen):
517 and revlog.length(deltainfo.base) < deltainfo.deltalen):
514 return False
518 return False
515
519
516 return True
520 return True
517
521
518 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
522 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
519 """Provides group of revision to be tested as delta base
523 """Provides group of revision to be tested as delta base
520
524
521 This top level function focus on emitting groups with unique and worthwhile
525 This top level function focus on emitting groups with unique and worthwhile
522 content. See _raw_candidate_groups for details about the group order.
526 content. See _raw_candidate_groups for details about the group order.
523 """
527 """
524 # should we try to build a delta?
528 # should we try to build a delta?
525 if not (len(revlog) and revlog._storedeltachains):
529 if not (len(revlog) and revlog._storedeltachains):
526 yield None
530 yield None
527 return
531 return
528
532
529 deltalength = revlog.length
533 deltalength = revlog.length
530 deltaparent = revlog.deltaparent
534 deltaparent = revlog.deltaparent
531 good = None
535 good = None
532
536
533 deltas_limit = textlen * LIMIT_DELTA2TEXT
537 deltas_limit = textlen * LIMIT_DELTA2TEXT
534
538
535 tested = set([nullrev])
539 tested = set([nullrev])
536 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
540 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
537 while True:
541 while True:
538 temptative = candidates.send(good)
542 temptative = candidates.send(good)
539 if temptative is None:
543 if temptative is None:
540 break
544 break
541 group = []
545 group = []
542 for rev in temptative:
546 for rev in temptative:
543 # skip over empty delta (no need to include them in a chain)
547 # skip over empty delta (no need to include them in a chain)
544 while (revlog._generaldelta
548 while (revlog._generaldelta
545 and not (rev == nullrev
549 and not (rev == nullrev
546 or rev in tested
550 or rev in tested
547 or deltalength(rev))):
551 or deltalength(rev))):
548 tested.add(rev)
552 tested.add(rev)
549 rev = deltaparent(rev)
553 rev = deltaparent(rev)
550 # filter out revision we tested already
554 # filter out revision we tested already
551 if rev in tested:
555 if rev in tested:
552 continue
556 continue
553 tested.add(rev)
557 tested.add(rev)
554 # filter out delta base that will never produce good delta
558 # filter out delta base that will never produce good delta
555 if deltas_limit < revlog.length(rev):
559 if deltas_limit < revlog.length(rev):
556 continue
560 continue
557 # no need to try a delta against nullrev, this will be done as a
561 # no need to try a delta against nullrev, this will be done as a
558 # last resort.
562 # last resort.
559 if rev == nullrev:
563 if rev == nullrev:
560 continue
564 continue
561 # no delta for rawtext-changing revs (see "candelta" for why)
565 # no delta for rawtext-changing revs (see "candelta" for why)
562 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
566 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
563 continue
567 continue
564 group.append(rev)
568 group.append(rev)
565 if group:
569 if group:
566 # XXX: in the sparse revlog case, group can become large,
570 # XXX: in the sparse revlog case, group can become large,
567 # impacting performances. Some bounding or slicing mecanism
571 # impacting performances. Some bounding or slicing mecanism
568 # would help to reduce this impact.
572 # would help to reduce this impact.
569 good = yield tuple(group)
573 good = yield tuple(group)
570 yield None
574 yield None
571
575
572 def _findsnapshots(revlog, cache, start_rev):
576 def _findsnapshots(revlog, cache, start_rev):
573 """find snapshot from start_rev to tip"""
577 """find snapshot from start_rev to tip"""
574 deltaparent = revlog.deltaparent
578 deltaparent = revlog.deltaparent
575 issnapshot = revlog.issnapshot
579 issnapshot = revlog.issnapshot
576 for rev in revlog.revs(start_rev):
580 for rev in revlog.revs(start_rev):
577 if issnapshot(rev):
581 if issnapshot(rev):
578 cache[deltaparent(rev)].append(rev)
582 cache[deltaparent(rev)].append(rev)
579
583
580 def _refinedgroups(revlog, p1, p2, cachedelta):
584 def _refinedgroups(revlog, p1, p2, cachedelta):
581 good = None
585 good = None
582 # First we try to reuse a the delta contained in the bundle.
586 # First we try to reuse a the delta contained in the bundle.
583 # (or from the source revlog)
587 # (or from the source revlog)
584 #
588 #
585 # This logic only applies to general delta repositories and can be disabled
589 # This logic only applies to general delta repositories and can be disabled
586 # through configuration. Disabling reuse source delta is useful when
590 # through configuration. Disabling reuse source delta is useful when
587 # we want to make sure we recomputed "optimal" deltas.
591 # we want to make sure we recomputed "optimal" deltas.
588 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
592 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
589 # Assume what we received from the server is a good choice
593 # Assume what we received from the server is a good choice
590 # build delta will reuse the cache
594 # build delta will reuse the cache
591 good = yield (cachedelta[0],)
595 good = yield (cachedelta[0],)
592 if good is not None:
596 if good is not None:
593 yield None
597 yield None
594 return
598 return
595 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
599 for candidates in _rawgroups(revlog, p1, p2, cachedelta):
596 good = yield candidates
600 good = yield candidates
597 if good is not None:
601 if good is not None:
598 break
602 break
599
603
600 # If sparse revlog is enabled, we can try to refine the available deltas
604 # If sparse revlog is enabled, we can try to refine the available deltas
601 if not revlog._sparserevlog:
605 if not revlog._sparserevlog:
602 yield None
606 yield None
603 return
607 return
604
608
605 # if we have a refinable value, try to refine it
609 # if we have a refinable value, try to refine it
606 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
610 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
607 # refine snapshot down
611 # refine snapshot down
608 previous = None
612 previous = None
609 while previous != good:
613 while previous != good:
610 previous = good
614 previous = good
611 base = revlog.deltaparent(good)
615 base = revlog.deltaparent(good)
612 if base == nullrev:
616 if base == nullrev:
613 break
617 break
614 good = yield (base,)
618 good = yield (base,)
615 # refine snapshot up
619 # refine snapshot up
616 #
620 #
617 # XXX the _findsnapshots call can be expensive and is "duplicated" with
621 # XXX the _findsnapshots call can be expensive and is "duplicated" with
618 # the one done in `_rawgroups`. Once we start working on performance,
622 # the one done in `_rawgroups`. Once we start working on performance,
619 # we should make the two logics share this computation.
623 # we should make the two logics share this computation.
620 snapshots = collections.defaultdict(list)
624 snapshots = collections.defaultdict(list)
621 _findsnapshots(revlog, snapshots, good + 1)
625 _findsnapshots(revlog, snapshots, good + 1)
622 previous = None
626 previous = None
623 while good != previous:
627 while good != previous:
624 previous = good
628 previous = good
625 children = tuple(sorted(c for c in snapshots[good]))
629 children = tuple(sorted(c for c in snapshots[good]))
626 good = yield children
630 good = yield children
627
631
628 # we have found nothing
632 # we have found nothing
629 yield None
633 yield None
630
634
631 def _rawgroups(revlog, p1, p2, cachedelta):
635 def _rawgroups(revlog, p1, p2, cachedelta):
632 """Provides group of revision to be tested as delta base
636 """Provides group of revision to be tested as delta base
633
637
634 This lower level function focus on emitting delta theorically interresting
638 This lower level function focus on emitting delta theorically interresting
635 without looking it any practical details.
639 without looking it any practical details.
636
640
637 The group order aims at providing fast or small candidates first.
641 The group order aims at providing fast or small candidates first.
638 """
642 """
639 gdelta = revlog._generaldelta
643 gdelta = revlog._generaldelta
640 sparse = revlog._sparserevlog
644 sparse = revlog._sparserevlog
641 curr = len(revlog)
645 curr = len(revlog)
642 prev = curr - 1
646 prev = curr - 1
643 deltachain = lambda rev: revlog._deltachain(rev)[0]
647 deltachain = lambda rev: revlog._deltachain(rev)[0]
644
648
645 if gdelta:
649 if gdelta:
646 # exclude already lazy tested base if any
650 # exclude already lazy tested base if any
647 parents = [p for p in (p1, p2) if p != nullrev]
651 parents = [p for p in (p1, p2) if p != nullrev]
648
652
649 if not revlog._deltabothparents and len(parents) == 2:
653 if not revlog._deltabothparents and len(parents) == 2:
650 parents.sort()
654 parents.sort()
651 # To minimize the chance of having to build a fulltext,
655 # To minimize the chance of having to build a fulltext,
652 # pick first whichever parent is closest to us (max rev)
656 # pick first whichever parent is closest to us (max rev)
653 yield (parents[1],)
657 yield (parents[1],)
654 # then the other one (min rev) if the first did not fit
658 # then the other one (min rev) if the first did not fit
655 yield (parents[0],)
659 yield (parents[0],)
656 elif len(parents) > 0:
660 elif len(parents) > 0:
657 # Test all parents (1 or 2), and keep the best candidate
661 # Test all parents (1 or 2), and keep the best candidate
658 yield parents
662 yield parents
659
663
660 if sparse and parents:
664 if sparse and parents:
661 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
665 snapshots = collections.defaultdict(list) # map: base-rev: snapshot-rev
662 # See if we can use an existing snapshot in the parent chains to use as
666 # See if we can use an existing snapshot in the parent chains to use as
663 # a base for a new intermediate-snapshot
667 # a base for a new intermediate-snapshot
664 #
668 #
665 # search for snapshot in parents delta chain
669 # search for snapshot in parents delta chain
666 # map: snapshot-level: snapshot-rev
670 # map: snapshot-level: snapshot-rev
667 parents_snaps = collections.defaultdict(set)
671 parents_snaps = collections.defaultdict(set)
668 candidate_chains = [deltachain(p) for p in parents]
672 candidate_chains = [deltachain(p) for p in parents]
669 for chain in candidate_chains:
673 for chain in candidate_chains:
670 for idx, s in enumerate(chain):
674 for idx, s in enumerate(chain):
671 if not revlog.issnapshot(s):
675 if not revlog.issnapshot(s):
672 break
676 break
673 parents_snaps[idx].add(s)
677 parents_snaps[idx].add(s)
674 snapfloor = min(parents_snaps[0]) + 1
678 snapfloor = min(parents_snaps[0]) + 1
675 _findsnapshots(revlog, snapshots, snapfloor)
679 _findsnapshots(revlog, snapshots, snapfloor)
676 # search for the highest "unrelated" revision
680 # search for the highest "unrelated" revision
677 #
681 #
678 # Adding snapshots used by "unrelated" revision increase the odd we
682 # Adding snapshots used by "unrelated" revision increase the odd we
679 # reuse an independant, yet better snapshot chain.
683 # reuse an independant, yet better snapshot chain.
680 #
684 #
681 # XXX instead of building a set of revisions, we could lazily enumerate
685 # XXX instead of building a set of revisions, we could lazily enumerate
682 # over the chains. That would be more efficient, however we stick to
686 # over the chains. That would be more efficient, however we stick to
683 # simple code for now.
687 # simple code for now.
684 all_revs = set()
688 all_revs = set()
685 for chain in candidate_chains:
689 for chain in candidate_chains:
686 all_revs.update(chain)
690 all_revs.update(chain)
687 other = None
691 other = None
688 for r in revlog.revs(prev, snapfloor):
692 for r in revlog.revs(prev, snapfloor):
689 if r not in all_revs:
693 if r not in all_revs:
690 other = r
694 other = r
691 break
695 break
692 if other is not None:
696 if other is not None:
693 # To avoid unfair competition, we won't use unrelated intermediate
697 # To avoid unfair competition, we won't use unrelated intermediate
694 # snapshot that are deeper than the ones from the parent delta
698 # snapshot that are deeper than the ones from the parent delta
695 # chain.
699 # chain.
696 max_depth = max(parents_snaps.keys())
700 max_depth = max(parents_snaps.keys())
697 chain = deltachain(other)
701 chain = deltachain(other)
698 for idx, s in enumerate(chain):
702 for idx, s in enumerate(chain):
699 if s < snapfloor:
703 if s < snapfloor:
700 continue
704 continue
701 if max_depth < idx:
705 if max_depth < idx:
702 break
706 break
703 if not revlog.issnapshot(s):
707 if not revlog.issnapshot(s):
704 break
708 break
705 parents_snaps[idx].add(s)
709 parents_snaps[idx].add(s)
706 # Test them as possible intermediate snapshot base
710 # Test them as possible intermediate snapshot base
707 # We test them from highest to lowest level. High level one are more
711 # We test them from highest to lowest level. High level one are more
708 # likely to result in small delta
712 # likely to result in small delta
709 floor = None
713 floor = None
710 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
714 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
711 siblings = set()
715 siblings = set()
712 for s in snaps:
716 for s in snaps:
713 siblings.update(snapshots[s])
717 siblings.update(snapshots[s])
714 # Before considering making a new intermediate snapshot, we check
718 # Before considering making a new intermediate snapshot, we check
715 # if an existing snapshot, children of base we consider, would be
719 # if an existing snapshot, children of base we consider, would be
716 # suitable.
720 # suitable.
717 #
721 #
718 # It give a change to reuse a delta chain "unrelated" to the
722 # It give a change to reuse a delta chain "unrelated" to the
719 # current revision instead of starting our own. Without such
723 # current revision instead of starting our own. Without such
720 # re-use, topological branches would keep reopening new chains.
724 # re-use, topological branches would keep reopening new chains.
721 # Creating more and more snapshot as the repository grow.
725 # Creating more and more snapshot as the repository grow.
722
726
723 if floor is not None:
727 if floor is not None:
724 # We only do this for siblings created after the one in our
728 # We only do this for siblings created after the one in our
725 # parent's delta chain. Those created before has less chances
729 # parent's delta chain. Those created before has less chances
726 # to be valid base since our ancestors had to create a new
730 # to be valid base since our ancestors had to create a new
727 # snapshot.
731 # snapshot.
728 siblings = [r for r in siblings if floor < r]
732 siblings = [r for r in siblings if floor < r]
729 yield tuple(sorted(siblings))
733 yield tuple(sorted(siblings))
730 # then test the base from our parent's delta chain.
734 # then test the base from our parent's delta chain.
731 yield tuple(sorted(snaps))
735 yield tuple(sorted(snaps))
732 floor = min(snaps)
736 floor = min(snaps)
733 # No suitable base found in the parent chain, search if any full
737 # No suitable base found in the parent chain, search if any full
734 # snapshots emitted since parent's base would be a suitable base for an
738 # snapshots emitted since parent's base would be a suitable base for an
735 # intermediate snapshot.
739 # intermediate snapshot.
736 #
740 #
737 # It give a chance to reuse a delta chain unrelated to the current
741 # It give a chance to reuse a delta chain unrelated to the current
738 # revisions instead of starting our own. Without such re-use,
742 # revisions instead of starting our own. Without such re-use,
739 # topological branches would keep reopening new full chains. Creating
743 # topological branches would keep reopening new full chains. Creating
740 # more and more snapshot as the repository grow.
744 # more and more snapshot as the repository grow.
741 yield tuple(snapshots[nullrev])
745 yield tuple(snapshots[nullrev])
742
746
743 if not sparse:
747 if not sparse:
744 # other approach failed try against prev to hopefully save us a
748 # other approach failed try against prev to hopefully save us a
745 # fulltext.
749 # fulltext.
746 yield (prev,)
750 yield (prev,)
747
751
748 class deltacomputer(object):
752 class deltacomputer(object):
749 def __init__(self, revlog):
753 def __init__(self, revlog):
750 self.revlog = revlog
754 self.revlog = revlog
751
755
752 def buildtext(self, revinfo, fh):
756 def buildtext(self, revinfo, fh):
753 """Builds a fulltext version of a revision
757 """Builds a fulltext version of a revision
754
758
755 revinfo: _revisioninfo instance that contains all needed info
759 revinfo: _revisioninfo instance that contains all needed info
756 fh: file handle to either the .i or the .d revlog file,
760 fh: file handle to either the .i or the .d revlog file,
757 depending on whether it is inlined or not
761 depending on whether it is inlined or not
758 """
762 """
759 btext = revinfo.btext
763 btext = revinfo.btext
760 if btext[0] is not None:
764 if btext[0] is not None:
761 return btext[0]
765 return btext[0]
762
766
763 revlog = self.revlog
767 revlog = self.revlog
764 cachedelta = revinfo.cachedelta
768 cachedelta = revinfo.cachedelta
765 baserev = cachedelta[0]
769 baserev = cachedelta[0]
766 delta = cachedelta[1]
770 delta = cachedelta[1]
767
771
768 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
772 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
769 revinfo.p1, revinfo.p2,
773 revinfo.p1, revinfo.p2,
770 revinfo.flags, revinfo.node)
774 revinfo.flags, revinfo.node)
771 return fulltext
775 return fulltext
772
776
773 def _builddeltadiff(self, base, revinfo, fh):
777 def _builddeltadiff(self, base, revinfo, fh):
774 revlog = self.revlog
778 revlog = self.revlog
775 t = self.buildtext(revinfo, fh)
779 t = self.buildtext(revinfo, fh)
776 if revlog.iscensored(base):
780 if revlog.iscensored(base):
777 # deltas based on a censored revision must replace the
781 # deltas based on a censored revision must replace the
778 # full content in one patch, so delta works everywhere
782 # full content in one patch, so delta works everywhere
779 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
783 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
780 delta = header + t
784 delta = header + t
781 else:
785 else:
782 ptext = revlog.revision(base, _df=fh, raw=True)
786 ptext = revlog.revision(base, _df=fh, raw=True)
783 delta = mdiff.textdiff(ptext, t)
787 delta = mdiff.textdiff(ptext, t)
784
788
785 return delta
789 return delta
786
790
787 def _builddeltainfo(self, revinfo, base, fh):
791 def _builddeltainfo(self, revinfo, base, fh):
788 # can we use the cached delta?
792 # can we use the cached delta?
789 delta = None
793 delta = None
790 if revinfo.cachedelta:
794 if revinfo.cachedelta:
791 cachebase, cachediff = revinfo.cachedelta
795 cachebase, cachediff = revinfo.cachedelta
792 #check if the diff still apply
796 #check if the diff still apply
793 currentbase = cachebase
797 currentbase = cachebase
794 while (currentbase != nullrev
798 while (currentbase != nullrev
795 and currentbase != base
799 and currentbase != base
796 and self.revlog.length(currentbase) == 0):
800 and self.revlog.length(currentbase) == 0):
797 currentbase = self.revlog.deltaparent(currentbase)
801 currentbase = self.revlog.deltaparent(currentbase)
798 if currentbase == base:
802 if currentbase == base:
799 delta = revinfo.cachedelta[1]
803 delta = revinfo.cachedelta[1]
800 if delta is None:
804 if delta is None:
801 delta = self._builddeltadiff(base, revinfo, fh)
805 delta = self._builddeltadiff(base, revinfo, fh)
802 revlog = self.revlog
806 revlog = self.revlog
803 header, data = revlog.compress(delta)
807 header, data = revlog.compress(delta)
804 deltalen = len(header) + len(data)
808 deltalen = len(header) + len(data)
805 chainbase = revlog.chainbase(base)
809 chainbase = revlog.chainbase(base)
806 offset = revlog.end(len(revlog) - 1)
810 offset = revlog.end(len(revlog) - 1)
807 dist = deltalen + offset - revlog.start(chainbase)
811 dist = deltalen + offset - revlog.start(chainbase)
808 if revlog._generaldelta:
812 if revlog._generaldelta:
809 deltabase = base
813 deltabase = base
810 else:
814 else:
811 deltabase = chainbase
815 deltabase = chainbase
812 chainlen, compresseddeltalen = revlog._chaininfo(base)
816 chainlen, compresseddeltalen = revlog._chaininfo(base)
813 chainlen += 1
817 chainlen += 1
814 compresseddeltalen += deltalen
818 compresseddeltalen += deltalen
815
819
816 revlog = self.revlog
820 revlog = self.revlog
817 snapshotdepth = None
821 snapshotdepth = None
818 if deltabase == nullrev:
822 if deltabase == nullrev:
819 snapshotdepth = 0
823 snapshotdepth = 0
820 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
824 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
821 # A delta chain should always be one full snapshot,
825 # A delta chain should always be one full snapshot,
822 # zero or more semi-snapshots, and zero or more deltas
826 # zero or more semi-snapshots, and zero or more deltas
823 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
827 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
824 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
828 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
825 snapshotdepth = len(revlog._deltachain(deltabase)[0])
829 snapshotdepth = len(revlog._deltachain(deltabase)[0])
826
830
827 return _deltainfo(dist, deltalen, (header, data), deltabase,
831 return _deltainfo(dist, deltalen, (header, data), deltabase,
828 chainbase, chainlen, compresseddeltalen,
832 chainbase, chainlen, compresseddeltalen,
829 snapshotdepth)
833 snapshotdepth)
830
834
831 def _fullsnapshotinfo(self, fh, revinfo):
835 def _fullsnapshotinfo(self, fh, revinfo):
832 curr = len(self.revlog)
836 curr = len(self.revlog)
833 rawtext = self.buildtext(revinfo, fh)
837 rawtext = self.buildtext(revinfo, fh)
834 data = self.revlog.compress(rawtext)
838 data = self.revlog.compress(rawtext)
835 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
839 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
836 deltabase = chainbase = curr
840 deltabase = chainbase = curr
837 snapshotdepth = 0
841 snapshotdepth = 0
838 chainlen = 1
842 chainlen = 1
839
843
840 return _deltainfo(dist, deltalen, data, deltabase,
844 return _deltainfo(dist, deltalen, data, deltabase,
841 chainbase, chainlen, compresseddeltalen,
845 chainbase, chainlen, compresseddeltalen,
842 snapshotdepth)
846 snapshotdepth)
843
847
844 def finddeltainfo(self, revinfo, fh):
848 def finddeltainfo(self, revinfo, fh):
845 """Find an acceptable delta against a candidate revision
849 """Find an acceptable delta against a candidate revision
846
850
847 revinfo: information about the revision (instance of _revisioninfo)
851 revinfo: information about the revision (instance of _revisioninfo)
848 fh: file handle to either the .i or the .d revlog file,
852 fh: file handle to either the .i or the .d revlog file,
849 depending on whether it is inlined or not
853 depending on whether it is inlined or not
850
854
851 Returns the first acceptable candidate revision, as ordered by
855 Returns the first acceptable candidate revision, as ordered by
852 _candidategroups
856 _candidategroups
853
857
854 If no suitable deltabase is found, we return delta info for a full
858 If no suitable deltabase is found, we return delta info for a full
855 snapshot.
859 snapshot.
856 """
860 """
857 if not revinfo.textlen:
861 if not revinfo.textlen:
858 return self._fullsnapshotinfo(fh, revinfo)
862 return self._fullsnapshotinfo(fh, revinfo)
859
863
860 # no delta for flag processor revision (see "candelta" for why)
864 # no delta for flag processor revision (see "candelta" for why)
861 # not calling candelta since only one revision needs test, also to
865 # not calling candelta since only one revision needs test, also to
862 # avoid overhead fetching flags again.
866 # avoid overhead fetching flags again.
863 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
867 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
864 return self._fullsnapshotinfo(fh, revinfo)
868 return self._fullsnapshotinfo(fh, revinfo)
865
869
866 cachedelta = revinfo.cachedelta
870 cachedelta = revinfo.cachedelta
867 p1 = revinfo.p1
871 p1 = revinfo.p1
868 p2 = revinfo.p2
872 p2 = revinfo.p2
869 revlog = self.revlog
873 revlog = self.revlog
870
874
871 deltainfo = None
875 deltainfo = None
872 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
876 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
873 groups = _candidategroups(self.revlog, revinfo.textlen,
877 groups = _candidategroups(self.revlog, revinfo.textlen,
874 p1r, p2r, cachedelta)
878 p1r, p2r, cachedelta)
875 candidaterevs = next(groups)
879 candidaterevs = next(groups)
876 while candidaterevs is not None:
880 while candidaterevs is not None:
877 nominateddeltas = []
881 nominateddeltas = []
878 if deltainfo is not None:
882 if deltainfo is not None:
879 # if we already found a good delta,
883 # if we already found a good delta,
880 # challenge it against refined candidates
884 # challenge it against refined candidates
881 nominateddeltas.append(deltainfo)
885 nominateddeltas.append(deltainfo)
882 for candidaterev in candidaterevs:
886 for candidaterev in candidaterevs:
883 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
887 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
884 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
888 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
885 nominateddeltas.append(candidatedelta)
889 nominateddeltas.append(candidatedelta)
886 if nominateddeltas:
890 if nominateddeltas:
887 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
891 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
888 if deltainfo is not None:
892 if deltainfo is not None:
889 candidaterevs = groups.send(deltainfo.base)
893 candidaterevs = groups.send(deltainfo.base)
890 else:
894 else:
891 candidaterevs = next(groups)
895 candidaterevs = next(groups)
892
896
893 if deltainfo is None:
897 if deltainfo is None:
894 deltainfo = self._fullsnapshotinfo(fh, revinfo)
898 deltainfo = self._fullsnapshotinfo(fh, revinfo)
895 return deltainfo
899 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now