##// END OF EJS Templates
flagprocessors: use _processflagsraw in easy cases...
marmoute -
r43147:1ebf1a1e default
parent child Browse files
Show More
@@ -1,1048 +1,1047 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import (
16 from ..node import (
17 nullrev,
17 nullrev,
18 )
18 )
19 from ..i18n import _
19 from ..i18n import _
20
20
21 from .constants import (
21 from .constants import (
22 REVIDX_ISCENSORED,
22 REVIDX_ISCENSORED,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
23 REVIDX_RAWTEXT_CHANGING_FLAGS,
24 )
24 )
25
25
26 from ..thirdparty import (
26 from ..thirdparty import (
27 attr,
27 attr,
28 )
28 )
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 util,
33 util,
34 )
34 )
35
35
36 # maximum <delta-chain-data>/<revision-text-length> ratio
36 # maximum <delta-chain-data>/<revision-text-length> ratio
37 LIMIT_DELTA2TEXT = 2
37 LIMIT_DELTA2TEXT = 2
38
38
39 class _testrevlog(object):
39 class _testrevlog(object):
40 """minimalist fake revlog to use in doctests"""
40 """minimalist fake revlog to use in doctests"""
41
41
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
42 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
43 """data is an list of revision payload boundaries"""
43 """data is an list of revision payload boundaries"""
44 self._data = data
44 self._data = data
45 self._srdensitythreshold = density
45 self._srdensitythreshold = density
46 self._srmingapsize = mingap
46 self._srmingapsize = mingap
47 self._snapshot = set(snapshot)
47 self._snapshot = set(snapshot)
48 self.index = None
48 self.index = None
49
49
50 def start(self, rev):
50 def start(self, rev):
51 if rev == nullrev:
51 if rev == nullrev:
52 return 0
52 return 0
53 if rev == 0:
53 if rev == 0:
54 return 0
54 return 0
55 return self._data[rev - 1]
55 return self._data[rev - 1]
56
56
57 def end(self, rev):
57 def end(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 return self._data[rev]
60 return self._data[rev]
61
61
62 def length(self, rev):
62 def length(self, rev):
63 return self.end(rev) - self.start(rev)
63 return self.end(rev) - self.start(rev)
64
64
65 def __len__(self):
65 def __len__(self):
66 return len(self._data)
66 return len(self._data)
67
67
68 def issnapshot(self, rev):
68 def issnapshot(self, rev):
69 if rev == nullrev:
69 if rev == nullrev:
70 return True
70 return True
71 return rev in self._snapshot
71 return rev in self._snapshot
72
72
73 def slicechunk(revlog, revs, targetsize=None):
73 def slicechunk(revlog, revs, targetsize=None):
74 """slice revs to reduce the amount of unrelated data to be read from disk.
74 """slice revs to reduce the amount of unrelated data to be read from disk.
75
75
76 ``revs`` is sliced into groups that should be read in one time.
76 ``revs`` is sliced into groups that should be read in one time.
77 Assume that revs are sorted.
77 Assume that revs are sorted.
78
78
79 The initial chunk is sliced until the overall density (payload/chunks-span
79 The initial chunk is sliced until the overall density (payload/chunks-span
80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
80 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
81 `revlog._srmingapsize` is skipped.
81 `revlog._srmingapsize` is skipped.
82
82
83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
83 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
84 For consistency with other slicing choice, this limit won't go lower than
84 For consistency with other slicing choice, this limit won't go lower than
85 `revlog._srmingapsize`.
85 `revlog._srmingapsize`.
86
86
87 If individual revisions chunk are larger than this limit, they will still
87 If individual revisions chunk are larger than this limit, they will still
88 be raised individually.
88 be raised individually.
89
89
90 >>> data = [
90 >>> data = [
91 ... 5, #00 (5)
91 ... 5, #00 (5)
92 ... 10, #01 (5)
92 ... 10, #01 (5)
93 ... 12, #02 (2)
93 ... 12, #02 (2)
94 ... 12, #03 (empty)
94 ... 12, #03 (empty)
95 ... 27, #04 (15)
95 ... 27, #04 (15)
96 ... 31, #05 (4)
96 ... 31, #05 (4)
97 ... 31, #06 (empty)
97 ... 31, #06 (empty)
98 ... 42, #07 (11)
98 ... 42, #07 (11)
99 ... 47, #08 (5)
99 ... 47, #08 (5)
100 ... 47, #09 (empty)
100 ... 47, #09 (empty)
101 ... 48, #10 (1)
101 ... 48, #10 (1)
102 ... 51, #11 (3)
102 ... 51, #11 (3)
103 ... 74, #12 (23)
103 ... 74, #12 (23)
104 ... 85, #13 (11)
104 ... 85, #13 (11)
105 ... 86, #14 (1)
105 ... 86, #14 (1)
106 ... 91, #15 (5)
106 ... 91, #15 (5)
107 ... ]
107 ... ]
108 >>> revlog = _testrevlog(data, snapshot=range(16))
108 >>> revlog = _testrevlog(data, snapshot=range(16))
109
109
110 >>> list(slicechunk(revlog, list(range(16))))
110 >>> list(slicechunk(revlog, list(range(16))))
111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
111 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
112 >>> list(slicechunk(revlog, [0, 15]))
112 >>> list(slicechunk(revlog, [0, 15]))
113 [[0], [15]]
113 [[0], [15]]
114 >>> list(slicechunk(revlog, [0, 11, 15]))
114 >>> list(slicechunk(revlog, [0, 11, 15]))
115 [[0], [11], [15]]
115 [[0], [11], [15]]
116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
116 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
117 [[0], [11, 13, 15]]
117 [[0], [11, 13, 15]]
118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
118 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
119 [[1, 2], [5, 8, 10, 11], [14]]
119 [[1, 2], [5, 8, 10, 11], [14]]
120
120
121 Slicing with a maximum chunk size
121 Slicing with a maximum chunk size
122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
123 [[0], [11], [13], [15]]
123 [[0], [11], [13], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
125 [[0], [11], [13, 15]]
125 [[0], [11], [13, 15]]
126
126
127 Slicing involving nullrev
127 Slicing involving nullrev
128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
128 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
129 [[-1, 0], [11], [13, 15]]
129 [[-1, 0], [11], [13, 15]]
130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
130 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
131 [[-1], [13], [15]]
131 [[-1], [13], [15]]
132 """
132 """
133 if targetsize is not None:
133 if targetsize is not None:
134 targetsize = max(targetsize, revlog._srmingapsize)
134 targetsize = max(targetsize, revlog._srmingapsize)
135 # targetsize should not be specified when evaluating delta candidates:
135 # targetsize should not be specified when evaluating delta candidates:
136 # * targetsize is used to ensure we stay within specification when reading,
136 # * targetsize is used to ensure we stay within specification when reading,
137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
137 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
138 if densityslicing is None:
138 if densityslicing is None:
139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
139 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
140 for chunk in densityslicing(revs,
140 for chunk in densityslicing(revs,
141 revlog._srdensitythreshold,
141 revlog._srdensitythreshold,
142 revlog._srmingapsize):
142 revlog._srmingapsize):
143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
143 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
144 yield subchunk
144 yield subchunk
145
145
146 def _slicechunktosize(revlog, revs, targetsize=None):
146 def _slicechunktosize(revlog, revs, targetsize=None):
147 """slice revs to match the target size
147 """slice revs to match the target size
148
148
149 This is intended to be used on chunk that density slicing selected by that
149 This is intended to be used on chunk that density slicing selected by that
150 are still too large compared to the read garantee of revlog. This might
150 are still too large compared to the read garantee of revlog. This might
151 happens when "minimal gap size" interrupted the slicing or when chain are
151 happens when "minimal gap size" interrupted the slicing or when chain are
152 built in a way that create large blocks next to each other.
152 built in a way that create large blocks next to each other.
153
153
154 >>> data = [
154 >>> data = [
155 ... 3, #0 (3)
155 ... 3, #0 (3)
156 ... 5, #1 (2)
156 ... 5, #1 (2)
157 ... 6, #2 (1)
157 ... 6, #2 (1)
158 ... 8, #3 (2)
158 ... 8, #3 (2)
159 ... 8, #4 (empty)
159 ... 8, #4 (empty)
160 ... 11, #5 (3)
160 ... 11, #5 (3)
161 ... 12, #6 (1)
161 ... 12, #6 (1)
162 ... 13, #7 (1)
162 ... 13, #7 (1)
163 ... 14, #8 (1)
163 ... 14, #8 (1)
164 ... ]
164 ... ]
165
165
166 == All snapshots cases ==
166 == All snapshots cases ==
167 >>> revlog = _testrevlog(data, snapshot=range(9))
167 >>> revlog = _testrevlog(data, snapshot=range(9))
168
168
169 Cases where chunk is already small enough
169 Cases where chunk is already small enough
170 >>> list(_slicechunktosize(revlog, [0], 3))
170 >>> list(_slicechunktosize(revlog, [0], 3))
171 [[0]]
171 [[0]]
172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
172 >>> list(_slicechunktosize(revlog, [6, 7], 3))
173 [[6, 7]]
173 [[6, 7]]
174 >>> list(_slicechunktosize(revlog, [0], None))
174 >>> list(_slicechunktosize(revlog, [0], None))
175 [[0]]
175 [[0]]
176 >>> list(_slicechunktosize(revlog, [6, 7], None))
176 >>> list(_slicechunktosize(revlog, [6, 7], None))
177 [[6, 7]]
177 [[6, 7]]
178
178
179 cases where we need actual slicing
179 cases where we need actual slicing
180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
180 >>> list(_slicechunktosize(revlog, [0, 1], 3))
181 [[0], [1]]
181 [[0], [1]]
182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
182 >>> list(_slicechunktosize(revlog, [1, 3], 3))
183 [[1], [3]]
183 [[1], [3]]
184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
184 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
185 [[1, 2], [3]]
185 [[1, 2], [3]]
186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
186 >>> list(_slicechunktosize(revlog, [3, 5], 3))
187 [[3], [5]]
187 [[3], [5]]
188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
188 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
189 [[3], [5]]
189 [[3], [5]]
190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
190 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
191 [[5], [6, 7, 8]]
191 [[5], [6, 7, 8]]
192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
192 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
193 [[0], [1, 2], [3], [5], [6, 7, 8]]
193 [[0], [1, 2], [3], [5], [6, 7, 8]]
194
194
195 Case with too large individual chunk (must return valid chunk)
195 Case with too large individual chunk (must return valid chunk)
196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
196 >>> list(_slicechunktosize(revlog, [0, 1], 2))
197 [[0], [1]]
197 [[0], [1]]
198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
198 >>> list(_slicechunktosize(revlog, [1, 3], 1))
199 [[1], [3]]
199 [[1], [3]]
200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
200 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
201 [[3], [5]]
201 [[3], [5]]
202
202
203 == No Snapshot cases ==
203 == No Snapshot cases ==
204 >>> revlog = _testrevlog(data)
204 >>> revlog = _testrevlog(data)
205
205
206 Cases where chunk is already small enough
206 Cases where chunk is already small enough
207 >>> list(_slicechunktosize(revlog, [0], 3))
207 >>> list(_slicechunktosize(revlog, [0], 3))
208 [[0]]
208 [[0]]
209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
209 >>> list(_slicechunktosize(revlog, [6, 7], 3))
210 [[6, 7]]
210 [[6, 7]]
211 >>> list(_slicechunktosize(revlog, [0], None))
211 >>> list(_slicechunktosize(revlog, [0], None))
212 [[0]]
212 [[0]]
213 >>> list(_slicechunktosize(revlog, [6, 7], None))
213 >>> list(_slicechunktosize(revlog, [6, 7], None))
214 [[6, 7]]
214 [[6, 7]]
215
215
216 cases where we need actual slicing
216 cases where we need actual slicing
217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
217 >>> list(_slicechunktosize(revlog, [0, 1], 3))
218 [[0], [1]]
218 [[0], [1]]
219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
219 >>> list(_slicechunktosize(revlog, [1, 3], 3))
220 [[1], [3]]
220 [[1], [3]]
221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
221 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
222 [[1], [2, 3]]
222 [[1], [2, 3]]
223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
223 >>> list(_slicechunktosize(revlog, [3, 5], 3))
224 [[3], [5]]
224 [[3], [5]]
225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
225 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
226 [[3], [4, 5]]
226 [[3], [4, 5]]
227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
227 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
228 [[5], [6, 7, 8]]
228 [[5], [6, 7, 8]]
229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
229 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
230 [[0], [1, 2], [3], [5], [6, 7, 8]]
230 [[0], [1, 2], [3], [5], [6, 7, 8]]
231
231
232 Case with too large individual chunk (must return valid chunk)
232 Case with too large individual chunk (must return valid chunk)
233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
233 >>> list(_slicechunktosize(revlog, [0, 1], 2))
234 [[0], [1]]
234 [[0], [1]]
235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
235 >>> list(_slicechunktosize(revlog, [1, 3], 1))
236 [[1], [3]]
236 [[1], [3]]
237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
237 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
238 [[3], [5]]
238 [[3], [5]]
239
239
240 == mixed case ==
240 == mixed case ==
241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
241 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
242 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
243 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
244 """
244 """
245 assert targetsize is None or 0 <= targetsize
245 assert targetsize is None or 0 <= targetsize
246 startdata = revlog.start(revs[0])
246 startdata = revlog.start(revs[0])
247 enddata = revlog.end(revs[-1])
247 enddata = revlog.end(revs[-1])
248 fullspan = enddata - startdata
248 fullspan = enddata - startdata
249 if targetsize is None or fullspan <= targetsize:
249 if targetsize is None or fullspan <= targetsize:
250 yield revs
250 yield revs
251 return
251 return
252
252
253 startrevidx = 0
253 startrevidx = 0
254 endrevidx = 1
254 endrevidx = 1
255 iterrevs = enumerate(revs)
255 iterrevs = enumerate(revs)
256 next(iterrevs) # skip first rev.
256 next(iterrevs) # skip first rev.
257 # first step: get snapshots out of the way
257 # first step: get snapshots out of the way
258 for idx, r in iterrevs:
258 for idx, r in iterrevs:
259 span = revlog.end(r) - startdata
259 span = revlog.end(r) - startdata
260 snapshot = revlog.issnapshot(r)
260 snapshot = revlog.issnapshot(r)
261 if span <= targetsize and snapshot:
261 if span <= targetsize and snapshot:
262 endrevidx = idx + 1
262 endrevidx = idx + 1
263 else:
263 else:
264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
264 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
265 if chunk:
265 if chunk:
266 yield chunk
266 yield chunk
267 startrevidx = idx
267 startrevidx = idx
268 startdata = revlog.start(r)
268 startdata = revlog.start(r)
269 endrevidx = idx + 1
269 endrevidx = idx + 1
270 if not snapshot:
270 if not snapshot:
271 break
271 break
272
272
273 # for the others, we use binary slicing to quickly converge toward valid
273 # for the others, we use binary slicing to quickly converge toward valid
274 # chunks (otherwise, we might end up looking for start/end of many
274 # chunks (otherwise, we might end up looking for start/end of many
275 # revisions). This logic is not looking for the perfect slicing point, it
275 # revisions). This logic is not looking for the perfect slicing point, it
276 # focuses on quickly converging toward valid chunks.
276 # focuses on quickly converging toward valid chunks.
277 nbitem = len(revs)
277 nbitem = len(revs)
278 while (enddata - startdata) > targetsize:
278 while (enddata - startdata) > targetsize:
279 endrevidx = nbitem
279 endrevidx = nbitem
280 if nbitem - startrevidx <= 1:
280 if nbitem - startrevidx <= 1:
281 break # protect against individual chunk larger than limit
281 break # protect against individual chunk larger than limit
282 localenddata = revlog.end(revs[endrevidx - 1])
282 localenddata = revlog.end(revs[endrevidx - 1])
283 span = localenddata - startdata
283 span = localenddata - startdata
284 while span > targetsize:
284 while span > targetsize:
285 if endrevidx - startrevidx <= 1:
285 if endrevidx - startrevidx <= 1:
286 break # protect against individual chunk larger than limit
286 break # protect against individual chunk larger than limit
287 endrevidx -= (endrevidx - startrevidx) // 2
287 endrevidx -= (endrevidx - startrevidx) // 2
288 localenddata = revlog.end(revs[endrevidx - 1])
288 localenddata = revlog.end(revs[endrevidx - 1])
289 span = localenddata - startdata
289 span = localenddata - startdata
290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
290 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
291 if chunk:
291 if chunk:
292 yield chunk
292 yield chunk
293 startrevidx = endrevidx
293 startrevidx = endrevidx
294 startdata = revlog.start(revs[startrevidx])
294 startdata = revlog.start(revs[startrevidx])
295
295
296 chunk = _trimchunk(revlog, revs, startrevidx)
296 chunk = _trimchunk(revlog, revs, startrevidx)
297 if chunk:
297 if chunk:
298 yield chunk
298 yield chunk
299
299
300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
300 def _slicechunktodensity(revlog, revs, targetdensity=0.5,
301 mingapsize=0):
301 mingapsize=0):
302 """slice revs to reduce the amount of unrelated data to be read from disk.
302 """slice revs to reduce the amount of unrelated data to be read from disk.
303
303
304 ``revs`` is sliced into groups that should be read in one time.
304 ``revs`` is sliced into groups that should be read in one time.
305 Assume that revs are sorted.
305 Assume that revs are sorted.
306
306
307 The initial chunk is sliced until the overall density (payload/chunks-span
307 The initial chunk is sliced until the overall density (payload/chunks-span
308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
308 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
309 skipped.
309 skipped.
310
310
311 >>> revlog = _testrevlog([
311 >>> revlog = _testrevlog([
312 ... 5, #00 (5)
312 ... 5, #00 (5)
313 ... 10, #01 (5)
313 ... 10, #01 (5)
314 ... 12, #02 (2)
314 ... 12, #02 (2)
315 ... 12, #03 (empty)
315 ... 12, #03 (empty)
316 ... 27, #04 (15)
316 ... 27, #04 (15)
317 ... 31, #05 (4)
317 ... 31, #05 (4)
318 ... 31, #06 (empty)
318 ... 31, #06 (empty)
319 ... 42, #07 (11)
319 ... 42, #07 (11)
320 ... 47, #08 (5)
320 ... 47, #08 (5)
321 ... 47, #09 (empty)
321 ... 47, #09 (empty)
322 ... 48, #10 (1)
322 ... 48, #10 (1)
323 ... 51, #11 (3)
323 ... 51, #11 (3)
324 ... 74, #12 (23)
324 ... 74, #12 (23)
325 ... 85, #13 (11)
325 ... 85, #13 (11)
326 ... 86, #14 (1)
326 ... 86, #14 (1)
327 ... 91, #15 (5)
327 ... 91, #15 (5)
328 ... ])
328 ... ])
329
329
330 >>> list(_slicechunktodensity(revlog, list(range(16))))
330 >>> list(_slicechunktodensity(revlog, list(range(16))))
331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
331 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
332 >>> list(_slicechunktodensity(revlog, [0, 15]))
332 >>> list(_slicechunktodensity(revlog, [0, 15]))
333 [[0], [15]]
333 [[0], [15]]
334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
334 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
335 [[0], [11], [15]]
335 [[0], [11], [15]]
336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
336 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
337 [[0], [11, 13, 15]]
337 [[0], [11, 13, 15]]
338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
338 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
339 [[1, 2], [5, 8, 10, 11], [14]]
339 [[1, 2], [5, 8, 10, 11], [14]]
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
340 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
341 ... mingapsize=20))
341 ... mingapsize=20))
342 [[1, 2, 3, 5, 8, 10, 11], [14]]
342 [[1, 2, 3, 5, 8, 10, 11], [14]]
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
344 ... targetdensity=0.95))
344 ... targetdensity=0.95))
345 [[1, 2], [5], [8, 10, 11], [14]]
345 [[1, 2], [5], [8, 10, 11], [14]]
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 ... targetdensity=0.95, mingapsize=12))
347 ... targetdensity=0.95, mingapsize=12))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 """
349 """
350 start = revlog.start
350 start = revlog.start
351 length = revlog.length
351 length = revlog.length
352
352
353 if len(revs) <= 1:
353 if len(revs) <= 1:
354 yield revs
354 yield revs
355 return
355 return
356
356
357 deltachainspan = segmentspan(revlog, revs)
357 deltachainspan = segmentspan(revlog, revs)
358
358
359 if deltachainspan < mingapsize:
359 if deltachainspan < mingapsize:
360 yield revs
360 yield revs
361 return
361 return
362
362
363 readdata = deltachainspan
363 readdata = deltachainspan
364 chainpayload = sum(length(r) for r in revs)
364 chainpayload = sum(length(r) for r in revs)
365
365
366 if deltachainspan:
366 if deltachainspan:
367 density = chainpayload / float(deltachainspan)
367 density = chainpayload / float(deltachainspan)
368 else:
368 else:
369 density = 1.0
369 density = 1.0
370
370
371 if density >= targetdensity:
371 if density >= targetdensity:
372 yield revs
372 yield revs
373 return
373 return
374
374
375 # Store the gaps in a heap to have them sorted by decreasing size
375 # Store the gaps in a heap to have them sorted by decreasing size
376 gaps = []
376 gaps = []
377 prevend = None
377 prevend = None
378 for i, rev in enumerate(revs):
378 for i, rev in enumerate(revs):
379 revstart = start(rev)
379 revstart = start(rev)
380 revlen = length(rev)
380 revlen = length(rev)
381
381
382 # Skip empty revisions to form larger holes
382 # Skip empty revisions to form larger holes
383 if revlen == 0:
383 if revlen == 0:
384 continue
384 continue
385
385
386 if prevend is not None:
386 if prevend is not None:
387 gapsize = revstart - prevend
387 gapsize = revstart - prevend
388 # only consider holes that are large enough
388 # only consider holes that are large enough
389 if gapsize > mingapsize:
389 if gapsize > mingapsize:
390 gaps.append((gapsize, i))
390 gaps.append((gapsize, i))
391
391
392 prevend = revstart + revlen
392 prevend = revstart + revlen
393 # sort the gaps to pop them from largest to small
393 # sort the gaps to pop them from largest to small
394 gaps.sort()
394 gaps.sort()
395
395
396 # Collect the indices of the largest holes until the density is acceptable
396 # Collect the indices of the largest holes until the density is acceptable
397 selected = []
397 selected = []
398 while gaps and density < targetdensity:
398 while gaps and density < targetdensity:
399 gapsize, gapidx = gaps.pop()
399 gapsize, gapidx = gaps.pop()
400
400
401 selected.append(gapidx)
401 selected.append(gapidx)
402
402
403 # the gap sizes are stored as negatives to be sorted decreasingly
403 # the gap sizes are stored as negatives to be sorted decreasingly
404 # by the heap
404 # by the heap
405 readdata -= gapsize
405 readdata -= gapsize
406 if readdata > 0:
406 if readdata > 0:
407 density = chainpayload / float(readdata)
407 density = chainpayload / float(readdata)
408 else:
408 else:
409 density = 1.0
409 density = 1.0
410 selected.sort()
410 selected.sort()
411
411
412 # Cut the revs at collected indices
412 # Cut the revs at collected indices
413 previdx = 0
413 previdx = 0
414 for idx in selected:
414 for idx in selected:
415
415
416 chunk = _trimchunk(revlog, revs, previdx, idx)
416 chunk = _trimchunk(revlog, revs, previdx, idx)
417 if chunk:
417 if chunk:
418 yield chunk
418 yield chunk
419
419
420 previdx = idx
420 previdx = idx
421
421
422 chunk = _trimchunk(revlog, revs, previdx)
422 chunk = _trimchunk(revlog, revs, previdx)
423 if chunk:
423 if chunk:
424 yield chunk
424 yield chunk
425
425
426 def _trimchunk(revlog, revs, startidx, endidx=None):
426 def _trimchunk(revlog, revs, startidx, endidx=None):
427 """returns revs[startidx:endidx] without empty trailing revs
427 """returns revs[startidx:endidx] without empty trailing revs
428
428
429 Doctest Setup
429 Doctest Setup
430 >>> revlog = _testrevlog([
430 >>> revlog = _testrevlog([
431 ... 5, #0
431 ... 5, #0
432 ... 10, #1
432 ... 10, #1
433 ... 12, #2
433 ... 12, #2
434 ... 12, #3 (empty)
434 ... 12, #3 (empty)
435 ... 17, #4
435 ... 17, #4
436 ... 21, #5
436 ... 21, #5
437 ... 21, #6 (empty)
437 ... 21, #6 (empty)
438 ... ])
438 ... ])
439
439
440 Contiguous cases:
440 Contiguous cases:
441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
441 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
442 [0, 1, 2, 3, 4, 5]
442 [0, 1, 2, 3, 4, 5]
443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
443 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
444 [0, 1, 2, 3, 4]
444 [0, 1, 2, 3, 4]
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
445 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
446 [0, 1, 2]
446 [0, 1, 2]
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
448 [2]
448 [2]
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
450 [3, 4, 5]
450 [3, 4, 5]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
452 [3, 4]
452 [3, 4]
453
453
454 Discontiguous cases:
454 Discontiguous cases:
455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
455 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
456 [1, 3, 5]
456 [1, 3, 5]
457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
457 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
458 [1]
458 [1]
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
459 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
460 [3, 5]
460 [3, 5]
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
462 [3, 5]
462 [3, 5]
463 """
463 """
464 length = revlog.length
464 length = revlog.length
465
465
466 if endidx is None:
466 if endidx is None:
467 endidx = len(revs)
467 endidx = len(revs)
468
468
469 # If we have a non-emtpy delta candidate, there are nothing to trim
469 # If we have a non-emtpy delta candidate, there are nothing to trim
470 if revs[endidx - 1] < len(revlog):
470 if revs[endidx - 1] < len(revlog):
471 # Trim empty revs at the end, except the very first revision of a chain
471 # Trim empty revs at the end, except the very first revision of a chain
472 while (endidx > 1
472 while (endidx > 1
473 and endidx > startidx
473 and endidx > startidx
474 and length(revs[endidx - 1]) == 0):
474 and length(revs[endidx - 1]) == 0):
475 endidx -= 1
475 endidx -= 1
476
476
477 return revs[startidx:endidx]
477 return revs[startidx:endidx]
478
478
479 def segmentspan(revlog, revs):
479 def segmentspan(revlog, revs):
480 """Get the byte span of a segment of revisions
480 """Get the byte span of a segment of revisions
481
481
482 revs is a sorted array of revision numbers
482 revs is a sorted array of revision numbers
483
483
484 >>> revlog = _testrevlog([
484 >>> revlog = _testrevlog([
485 ... 5, #0
485 ... 5, #0
486 ... 10, #1
486 ... 10, #1
487 ... 12, #2
487 ... 12, #2
488 ... 12, #3 (empty)
488 ... 12, #3 (empty)
489 ... 17, #4
489 ... 17, #4
490 ... ])
490 ... ])
491
491
492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
492 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
493 17
493 17
494 >>> segmentspan(revlog, [0, 4])
494 >>> segmentspan(revlog, [0, 4])
495 17
495 17
496 >>> segmentspan(revlog, [3, 4])
496 >>> segmentspan(revlog, [3, 4])
497 5
497 5
498 >>> segmentspan(revlog, [1, 2, 3,])
498 >>> segmentspan(revlog, [1, 2, 3,])
499 7
499 7
500 >>> segmentspan(revlog, [1, 3])
500 >>> segmentspan(revlog, [1, 3])
501 7
501 7
502 """
502 """
503 if not revs:
503 if not revs:
504 return 0
504 return 0
505 end = revlog.end(revs[-1])
505 end = revlog.end(revs[-1])
506 return end - revlog.start(revs[0])
506 return end - revlog.start(revs[0])
507
507
508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
508 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
509 """build full text from a (base, delta) pair and other metadata"""
509 """build full text from a (base, delta) pair and other metadata"""
510 # special case deltas which replace entire base; no need to decode
510 # special case deltas which replace entire base; no need to decode
511 # base revision. this neatly avoids censored bases, which throw when
511 # base revision. this neatly avoids censored bases, which throw when
512 # they're decoded.
512 # they're decoded.
513 hlen = struct.calcsize(">lll")
513 hlen = struct.calcsize(">lll")
514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
514 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
515 len(delta) - hlen):
515 len(delta) - hlen):
516 fulltext = delta[hlen:]
516 fulltext = delta[hlen:]
517 else:
517 else:
518 # deltabase is rawtext before changed by flag processors, which is
518 # deltabase is rawtext before changed by flag processors, which is
519 # equivalent to non-raw text
519 # equivalent to non-raw text
520 basetext = revlog.revision(baserev, _df=fh, raw=False)
520 basetext = revlog.revision(baserev, _df=fh, raw=False)
521 fulltext = mdiff.patch(basetext, delta)
521 fulltext = mdiff.patch(basetext, delta)
522
522
523 try:
523 try:
524 res = revlog._processflags(fulltext, flags, 'read', raw=True)
524 validatehash = revlog._processflagsraw(fulltext, flags)
525 fulltext, validatehash = res
526 if validatehash:
525 if validatehash:
527 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
526 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
528 if flags & REVIDX_ISCENSORED:
527 if flags & REVIDX_ISCENSORED:
529 raise error.StorageError(_('node %s is not censored') %
528 raise error.StorageError(_('node %s is not censored') %
530 expectednode)
529 expectednode)
531 except error.CensoredNodeError:
530 except error.CensoredNodeError:
532 # must pass the censored index flag to add censored revisions
531 # must pass the censored index flag to add censored revisions
533 if not flags & REVIDX_ISCENSORED:
532 if not flags & REVIDX_ISCENSORED:
534 raise
533 raise
535 return fulltext
534 return fulltext
536
535
537 @attr.s(slots=True, frozen=True)
536 @attr.s(slots=True, frozen=True)
538 class _deltainfo(object):
537 class _deltainfo(object):
539 distance = attr.ib()
538 distance = attr.ib()
540 deltalen = attr.ib()
539 deltalen = attr.ib()
541 data = attr.ib()
540 data = attr.ib()
542 base = attr.ib()
541 base = attr.ib()
543 chainbase = attr.ib()
542 chainbase = attr.ib()
544 chainlen = attr.ib()
543 chainlen = attr.ib()
545 compresseddeltalen = attr.ib()
544 compresseddeltalen = attr.ib()
546 snapshotdepth = attr.ib()
545 snapshotdepth = attr.ib()
547
546
548 def isgooddeltainfo(revlog, deltainfo, revinfo):
547 def isgooddeltainfo(revlog, deltainfo, revinfo):
549 """Returns True if the given delta is good. Good means that it is within
548 """Returns True if the given delta is good. Good means that it is within
550 the disk span, disk size, and chain length bounds that we know to be
549 the disk span, disk size, and chain length bounds that we know to be
551 performant."""
550 performant."""
552 if deltainfo is None:
551 if deltainfo is None:
553 return False
552 return False
554
553
555 # - 'deltainfo.distance' is the distance from the base revision --
554 # - 'deltainfo.distance' is the distance from the base revision --
556 # bounding it limits the amount of I/O we need to do.
555 # bounding it limits the amount of I/O we need to do.
557 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
556 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
558 # deltas we need to apply -- bounding it limits the amount of CPU
557 # deltas we need to apply -- bounding it limits the amount of CPU
559 # we consume.
558 # we consume.
560
559
561 textlen = revinfo.textlen
560 textlen = revinfo.textlen
562 defaultmax = textlen * 4
561 defaultmax = textlen * 4
563 maxdist = revlog._maxdeltachainspan
562 maxdist = revlog._maxdeltachainspan
564 if not maxdist:
563 if not maxdist:
565 maxdist = deltainfo.distance # ensure the conditional pass
564 maxdist = deltainfo.distance # ensure the conditional pass
566 maxdist = max(maxdist, defaultmax)
565 maxdist = max(maxdist, defaultmax)
567
566
568 # Bad delta from read span:
567 # Bad delta from read span:
569 #
568 #
570 # If the span of data read is larger than the maximum allowed.
569 # If the span of data read is larger than the maximum allowed.
571 #
570 #
572 # In the sparse-revlog case, we rely on the associated "sparse reading"
571 # In the sparse-revlog case, we rely on the associated "sparse reading"
573 # to avoid issue related to the span of data. In theory, it would be
572 # to avoid issue related to the span of data. In theory, it would be
574 # possible to build pathological revlog where delta pattern would lead
573 # possible to build pathological revlog where delta pattern would lead
575 # to too many reads. However, they do not happen in practice at all. So
574 # to too many reads. However, they do not happen in practice at all. So
576 # we skip the span check entirely.
575 # we skip the span check entirely.
577 if not revlog._sparserevlog and maxdist < deltainfo.distance:
576 if not revlog._sparserevlog and maxdist < deltainfo.distance:
578 return False
577 return False
579
578
580 # Bad delta from new delta size:
579 # Bad delta from new delta size:
581 #
580 #
582 # If the delta size is larger than the target text, storing the
581 # If the delta size is larger than the target text, storing the
583 # delta will be inefficient.
582 # delta will be inefficient.
584 if textlen < deltainfo.deltalen:
583 if textlen < deltainfo.deltalen:
585 return False
584 return False
586
585
587 # Bad delta from cumulated payload size:
586 # Bad delta from cumulated payload size:
588 #
587 #
589 # If the sum of delta get larger than K * target text length.
588 # If the sum of delta get larger than K * target text length.
590 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
589 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
591 return False
590 return False
592
591
593 # Bad delta from chain length:
592 # Bad delta from chain length:
594 #
593 #
595 # If the number of delta in the chain gets too high.
594 # If the number of delta in the chain gets too high.
596 if (revlog._maxchainlen
595 if (revlog._maxchainlen
597 and revlog._maxchainlen < deltainfo.chainlen):
596 and revlog._maxchainlen < deltainfo.chainlen):
598 return False
597 return False
599
598
600 # bad delta from intermediate snapshot size limit
599 # bad delta from intermediate snapshot size limit
601 #
600 #
602 # If an intermediate snapshot size is higher than the limit. The
601 # If an intermediate snapshot size is higher than the limit. The
603 # limit exist to prevent endless chain of intermediate delta to be
602 # limit exist to prevent endless chain of intermediate delta to be
604 # created.
603 # created.
605 if (deltainfo.snapshotdepth is not None and
604 if (deltainfo.snapshotdepth is not None and
606 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
605 (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen):
607 return False
606 return False
608
607
609 # bad delta if new intermediate snapshot is larger than the previous
608 # bad delta if new intermediate snapshot is larger than the previous
610 # snapshot
609 # snapshot
611 if (deltainfo.snapshotdepth
610 if (deltainfo.snapshotdepth
612 and revlog.length(deltainfo.base) < deltainfo.deltalen):
611 and revlog.length(deltainfo.base) < deltainfo.deltalen):
613 return False
612 return False
614
613
615 return True
614 return True
616
615
617 # If a revision's full text is that much bigger than a base candidate full
616 # If a revision's full text is that much bigger than a base candidate full
618 # text's, it is very unlikely that it will produce a valid delta. We no longer
617 # text's, it is very unlikely that it will produce a valid delta. We no longer
619 # consider these candidates.
618 # consider these candidates.
620 LIMIT_BASE2TEXT = 500
619 LIMIT_BASE2TEXT = 500
621
620
622 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
621 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
623 """Provides group of revision to be tested as delta base
622 """Provides group of revision to be tested as delta base
624
623
625 This top level function focus on emitting groups with unique and worthwhile
624 This top level function focus on emitting groups with unique and worthwhile
626 content. See _raw_candidate_groups for details about the group order.
625 content. See _raw_candidate_groups for details about the group order.
627 """
626 """
628 # should we try to build a delta?
627 # should we try to build a delta?
629 if not (len(revlog) and revlog._storedeltachains):
628 if not (len(revlog) and revlog._storedeltachains):
630 yield None
629 yield None
631 return
630 return
632
631
633 deltalength = revlog.length
632 deltalength = revlog.length
634 deltaparent = revlog.deltaparent
633 deltaparent = revlog.deltaparent
635 sparse = revlog._sparserevlog
634 sparse = revlog._sparserevlog
636 good = None
635 good = None
637
636
638 deltas_limit = textlen * LIMIT_DELTA2TEXT
637 deltas_limit = textlen * LIMIT_DELTA2TEXT
639
638
640 tested = {nullrev}
639 tested = {nullrev}
641 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
640 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
642 while True:
641 while True:
643 temptative = candidates.send(good)
642 temptative = candidates.send(good)
644 if temptative is None:
643 if temptative is None:
645 break
644 break
646 group = []
645 group = []
647 for rev in temptative:
646 for rev in temptative:
648 # skip over empty delta (no need to include them in a chain)
647 # skip over empty delta (no need to include them in a chain)
649 while (revlog._generaldelta
648 while (revlog._generaldelta
650 and not (rev == nullrev
649 and not (rev == nullrev
651 or rev in tested
650 or rev in tested
652 or deltalength(rev))):
651 or deltalength(rev))):
653 tested.add(rev)
652 tested.add(rev)
654 rev = deltaparent(rev)
653 rev = deltaparent(rev)
655 # no need to try a delta against nullrev, this will be done as a
654 # no need to try a delta against nullrev, this will be done as a
656 # last resort.
655 # last resort.
657 if rev == nullrev:
656 if rev == nullrev:
658 continue
657 continue
659 # filter out revision we tested already
658 # filter out revision we tested already
660 if rev in tested:
659 if rev in tested:
661 continue
660 continue
662 tested.add(rev)
661 tested.add(rev)
663 # filter out delta base that will never produce good delta
662 # filter out delta base that will never produce good delta
664 if deltas_limit < revlog.length(rev):
663 if deltas_limit < revlog.length(rev):
665 continue
664 continue
666 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
665 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
667 continue
666 continue
668 # no delta for rawtext-changing revs (see "candelta" for why)
667 # no delta for rawtext-changing revs (see "candelta" for why)
669 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
668 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
670 continue
669 continue
671 # If we reach here, we are about to build and test a delta.
670 # If we reach here, we are about to build and test a delta.
672 # The delta building process will compute the chaininfo in all
671 # The delta building process will compute the chaininfo in all
673 # case, since that computation is cached, it is fine to access it
672 # case, since that computation is cached, it is fine to access it
674 # here too.
673 # here too.
675 chainlen, chainsize = revlog._chaininfo(rev)
674 chainlen, chainsize = revlog._chaininfo(rev)
676 # if chain will be too long, skip base
675 # if chain will be too long, skip base
677 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
676 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
678 continue
677 continue
679 # if chain already have too much data, skip base
678 # if chain already have too much data, skip base
680 if deltas_limit < chainsize:
679 if deltas_limit < chainsize:
681 continue
680 continue
682 if sparse and revlog.upperboundcomp is not None:
681 if sparse and revlog.upperboundcomp is not None:
683 maxcomp = revlog.upperboundcomp
682 maxcomp = revlog.upperboundcomp
684 basenotsnap = (p1, p2, nullrev)
683 basenotsnap = (p1, p2, nullrev)
685 if rev not in basenotsnap and revlog.issnapshot(rev):
684 if rev not in basenotsnap and revlog.issnapshot(rev):
686 snapshotdepth = revlog.snapshotdepth(rev)
685 snapshotdepth = revlog.snapshotdepth(rev)
687 # If text is significantly larger than the base, we can
686 # If text is significantly larger than the base, we can
688 # expect the resulting delta to be proportional to the size
687 # expect the resulting delta to be proportional to the size
689 # difference
688 # difference
690 revsize = revlog.rawsize(rev)
689 revsize = revlog.rawsize(rev)
691 rawsizedistance = max(textlen - revsize, 0)
690 rawsizedistance = max(textlen - revsize, 0)
692 # use an estimate of the compression upper bound.
691 # use an estimate of the compression upper bound.
693 lowestrealisticdeltalen = rawsizedistance // maxcomp
692 lowestrealisticdeltalen = rawsizedistance // maxcomp
694
693
695 # check the absolute constraint on the delta size
694 # check the absolute constraint on the delta size
696 snapshotlimit = textlen >> snapshotdepth
695 snapshotlimit = textlen >> snapshotdepth
697 if snapshotlimit < lowestrealisticdeltalen:
696 if snapshotlimit < lowestrealisticdeltalen:
698 # delta lower bound is larger than accepted upper bound
697 # delta lower bound is larger than accepted upper bound
699 continue
698 continue
700
699
701 # check the relative constraint on the delta size
700 # check the relative constraint on the delta size
702 revlength = revlog.length(rev)
701 revlength = revlog.length(rev)
703 if revlength < lowestrealisticdeltalen:
702 if revlength < lowestrealisticdeltalen:
704 # delta probable lower bound is larger than target base
703 # delta probable lower bound is larger than target base
705 continue
704 continue
706
705
707 group.append(rev)
706 group.append(rev)
708 if group:
707 if group:
709 # XXX: in the sparse revlog case, group can become large,
708 # XXX: in the sparse revlog case, group can become large,
710 # impacting performances. Some bounding or slicing mecanism
709 # impacting performances. Some bounding or slicing mecanism
711 # would help to reduce this impact.
710 # would help to reduce this impact.
712 good = yield tuple(group)
711 good = yield tuple(group)
713 yield None
712 yield None
714
713
715 def _findsnapshots(revlog, cache, start_rev):
714 def _findsnapshots(revlog, cache, start_rev):
716 """find snapshot from start_rev to tip"""
715 """find snapshot from start_rev to tip"""
717 if util.safehasattr(revlog.index, 'findsnapshots'):
716 if util.safehasattr(revlog.index, 'findsnapshots'):
718 revlog.index.findsnapshots(cache, start_rev)
717 revlog.index.findsnapshots(cache, start_rev)
719 else:
718 else:
720 deltaparent = revlog.deltaparent
719 deltaparent = revlog.deltaparent
721 issnapshot = revlog.issnapshot
720 issnapshot = revlog.issnapshot
722 for rev in revlog.revs(start_rev):
721 for rev in revlog.revs(start_rev):
723 if issnapshot(rev):
722 if issnapshot(rev):
724 cache[deltaparent(rev)].append(rev)
723 cache[deltaparent(rev)].append(rev)
725
724
726 def _refinedgroups(revlog, p1, p2, cachedelta):
725 def _refinedgroups(revlog, p1, p2, cachedelta):
727 good = None
726 good = None
728 # First we try to reuse a the delta contained in the bundle.
727 # First we try to reuse a the delta contained in the bundle.
729 # (or from the source revlog)
728 # (or from the source revlog)
730 #
729 #
731 # This logic only applies to general delta repositories and can be disabled
730 # This logic only applies to general delta repositories and can be disabled
732 # through configuration. Disabling reuse source delta is useful when
731 # through configuration. Disabling reuse source delta is useful when
733 # we want to make sure we recomputed "optimal" deltas.
732 # we want to make sure we recomputed "optimal" deltas.
734 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
733 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
735 # Assume what we received from the server is a good choice
734 # Assume what we received from the server is a good choice
736 # build delta will reuse the cache
735 # build delta will reuse the cache
737 good = yield (cachedelta[0],)
736 good = yield (cachedelta[0],)
738 if good is not None:
737 if good is not None:
739 yield None
738 yield None
740 return
739 return
741 snapshots = collections.defaultdict(list)
740 snapshots = collections.defaultdict(list)
742 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
741 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
743 good = yield candidates
742 good = yield candidates
744 if good is not None:
743 if good is not None:
745 break
744 break
746
745
747 # If sparse revlog is enabled, we can try to refine the available deltas
746 # If sparse revlog is enabled, we can try to refine the available deltas
748 if not revlog._sparserevlog:
747 if not revlog._sparserevlog:
749 yield None
748 yield None
750 return
749 return
751
750
752 # if we have a refinable value, try to refine it
751 # if we have a refinable value, try to refine it
753 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
752 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
754 # refine snapshot down
753 # refine snapshot down
755 previous = None
754 previous = None
756 while previous != good:
755 while previous != good:
757 previous = good
756 previous = good
758 base = revlog.deltaparent(good)
757 base = revlog.deltaparent(good)
759 if base == nullrev:
758 if base == nullrev:
760 break
759 break
761 good = yield (base,)
760 good = yield (base,)
762 # refine snapshot up
761 # refine snapshot up
763 if not snapshots:
762 if not snapshots:
764 _findsnapshots(revlog, snapshots, good + 1)
763 _findsnapshots(revlog, snapshots, good + 1)
765 previous = None
764 previous = None
766 while good != previous:
765 while good != previous:
767 previous = good
766 previous = good
768 children = tuple(sorted(c for c in snapshots[good]))
767 children = tuple(sorted(c for c in snapshots[good]))
769 good = yield children
768 good = yield children
770
769
771 # we have found nothing
770 # we have found nothing
772 yield None
771 yield None
773
772
774 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
773 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
775 """Provides group of revision to be tested as delta base
774 """Provides group of revision to be tested as delta base
776
775
777 This lower level function focus on emitting delta theorically interresting
776 This lower level function focus on emitting delta theorically interresting
778 without looking it any practical details.
777 without looking it any practical details.
779
778
780 The group order aims at providing fast or small candidates first.
779 The group order aims at providing fast or small candidates first.
781 """
780 """
782 gdelta = revlog._generaldelta
781 gdelta = revlog._generaldelta
783 # gate sparse behind general-delta because of issue6056
782 # gate sparse behind general-delta because of issue6056
784 sparse = gdelta and revlog._sparserevlog
783 sparse = gdelta and revlog._sparserevlog
785 curr = len(revlog)
784 curr = len(revlog)
786 prev = curr - 1
785 prev = curr - 1
787 deltachain = lambda rev: revlog._deltachain(rev)[0]
786 deltachain = lambda rev: revlog._deltachain(rev)[0]
788
787
789 if gdelta:
788 if gdelta:
790 # exclude already lazy tested base if any
789 # exclude already lazy tested base if any
791 parents = [p for p in (p1, p2) if p != nullrev]
790 parents = [p for p in (p1, p2) if p != nullrev]
792
791
793 if not revlog._deltabothparents and len(parents) == 2:
792 if not revlog._deltabothparents and len(parents) == 2:
794 parents.sort()
793 parents.sort()
795 # To minimize the chance of having to build a fulltext,
794 # To minimize the chance of having to build a fulltext,
796 # pick first whichever parent is closest to us (max rev)
795 # pick first whichever parent is closest to us (max rev)
797 yield (parents[1],)
796 yield (parents[1],)
798 # then the other one (min rev) if the first did not fit
797 # then the other one (min rev) if the first did not fit
799 yield (parents[0],)
798 yield (parents[0],)
800 elif len(parents) > 0:
799 elif len(parents) > 0:
801 # Test all parents (1 or 2), and keep the best candidate
800 # Test all parents (1 or 2), and keep the best candidate
802 yield parents
801 yield parents
803
802
804 if sparse and parents:
803 if sparse and parents:
805 if snapshots is None:
804 if snapshots is None:
806 # map: base-rev: snapshot-rev
805 # map: base-rev: snapshot-rev
807 snapshots = collections.defaultdict(list)
806 snapshots = collections.defaultdict(list)
808 # See if we can use an existing snapshot in the parent chains to use as
807 # See if we can use an existing snapshot in the parent chains to use as
809 # a base for a new intermediate-snapshot
808 # a base for a new intermediate-snapshot
810 #
809 #
811 # search for snapshot in parents delta chain
810 # search for snapshot in parents delta chain
812 # map: snapshot-level: snapshot-rev
811 # map: snapshot-level: snapshot-rev
813 parents_snaps = collections.defaultdict(set)
812 parents_snaps = collections.defaultdict(set)
814 candidate_chains = [deltachain(p) for p in parents]
813 candidate_chains = [deltachain(p) for p in parents]
815 for chain in candidate_chains:
814 for chain in candidate_chains:
816 for idx, s in enumerate(chain):
815 for idx, s in enumerate(chain):
817 if not revlog.issnapshot(s):
816 if not revlog.issnapshot(s):
818 break
817 break
819 parents_snaps[idx].add(s)
818 parents_snaps[idx].add(s)
820 snapfloor = min(parents_snaps[0]) + 1
819 snapfloor = min(parents_snaps[0]) + 1
821 _findsnapshots(revlog, snapshots, snapfloor)
820 _findsnapshots(revlog, snapshots, snapfloor)
822 # search for the highest "unrelated" revision
821 # search for the highest "unrelated" revision
823 #
822 #
824 # Adding snapshots used by "unrelated" revision increase the odd we
823 # Adding snapshots used by "unrelated" revision increase the odd we
825 # reuse an independant, yet better snapshot chain.
824 # reuse an independant, yet better snapshot chain.
826 #
825 #
827 # XXX instead of building a set of revisions, we could lazily enumerate
826 # XXX instead of building a set of revisions, we could lazily enumerate
828 # over the chains. That would be more efficient, however we stick to
827 # over the chains. That would be more efficient, however we stick to
829 # simple code for now.
828 # simple code for now.
830 all_revs = set()
829 all_revs = set()
831 for chain in candidate_chains:
830 for chain in candidate_chains:
832 all_revs.update(chain)
831 all_revs.update(chain)
833 other = None
832 other = None
834 for r in revlog.revs(prev, snapfloor):
833 for r in revlog.revs(prev, snapfloor):
835 if r not in all_revs:
834 if r not in all_revs:
836 other = r
835 other = r
837 break
836 break
838 if other is not None:
837 if other is not None:
839 # To avoid unfair competition, we won't use unrelated intermediate
838 # To avoid unfair competition, we won't use unrelated intermediate
840 # snapshot that are deeper than the ones from the parent delta
839 # snapshot that are deeper than the ones from the parent delta
841 # chain.
840 # chain.
842 max_depth = max(parents_snaps.keys())
841 max_depth = max(parents_snaps.keys())
843 chain = deltachain(other)
842 chain = deltachain(other)
844 for idx, s in enumerate(chain):
843 for idx, s in enumerate(chain):
845 if s < snapfloor:
844 if s < snapfloor:
846 continue
845 continue
847 if max_depth < idx:
846 if max_depth < idx:
848 break
847 break
849 if not revlog.issnapshot(s):
848 if not revlog.issnapshot(s):
850 break
849 break
851 parents_snaps[idx].add(s)
850 parents_snaps[idx].add(s)
852 # Test them as possible intermediate snapshot base
851 # Test them as possible intermediate snapshot base
853 # We test them from highest to lowest level. High level one are more
852 # We test them from highest to lowest level. High level one are more
854 # likely to result in small delta
853 # likely to result in small delta
855 floor = None
854 floor = None
856 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
855 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
857 siblings = set()
856 siblings = set()
858 for s in snaps:
857 for s in snaps:
859 siblings.update(snapshots[s])
858 siblings.update(snapshots[s])
860 # Before considering making a new intermediate snapshot, we check
859 # Before considering making a new intermediate snapshot, we check
861 # if an existing snapshot, children of base we consider, would be
860 # if an existing snapshot, children of base we consider, would be
862 # suitable.
861 # suitable.
863 #
862 #
864 # It give a change to reuse a delta chain "unrelated" to the
863 # It give a change to reuse a delta chain "unrelated" to the
865 # current revision instead of starting our own. Without such
864 # current revision instead of starting our own. Without such
866 # re-use, topological branches would keep reopening new chains.
865 # re-use, topological branches would keep reopening new chains.
867 # Creating more and more snapshot as the repository grow.
866 # Creating more and more snapshot as the repository grow.
868
867
869 if floor is not None:
868 if floor is not None:
870 # We only do this for siblings created after the one in our
869 # We only do this for siblings created after the one in our
871 # parent's delta chain. Those created before has less chances
870 # parent's delta chain. Those created before has less chances
872 # to be valid base since our ancestors had to create a new
871 # to be valid base since our ancestors had to create a new
873 # snapshot.
872 # snapshot.
874 siblings = [r for r in siblings if floor < r]
873 siblings = [r for r in siblings if floor < r]
875 yield tuple(sorted(siblings))
874 yield tuple(sorted(siblings))
876 # then test the base from our parent's delta chain.
875 # then test the base from our parent's delta chain.
877 yield tuple(sorted(snaps))
876 yield tuple(sorted(snaps))
878 floor = min(snaps)
877 floor = min(snaps)
879 # No suitable base found in the parent chain, search if any full
878 # No suitable base found in the parent chain, search if any full
880 # snapshots emitted since parent's base would be a suitable base for an
879 # snapshots emitted since parent's base would be a suitable base for an
881 # intermediate snapshot.
880 # intermediate snapshot.
882 #
881 #
883 # It give a chance to reuse a delta chain unrelated to the current
882 # It give a chance to reuse a delta chain unrelated to the current
884 # revisions instead of starting our own. Without such re-use,
883 # revisions instead of starting our own. Without such re-use,
885 # topological branches would keep reopening new full chains. Creating
884 # topological branches would keep reopening new full chains. Creating
886 # more and more snapshot as the repository grow.
885 # more and more snapshot as the repository grow.
887 yield tuple(snapshots[nullrev])
886 yield tuple(snapshots[nullrev])
888
887
889 if not sparse:
888 if not sparse:
890 # other approach failed try against prev to hopefully save us a
889 # other approach failed try against prev to hopefully save us a
891 # fulltext.
890 # fulltext.
892 yield (prev,)
891 yield (prev,)
893
892
894 class deltacomputer(object):
893 class deltacomputer(object):
895 def __init__(self, revlog):
894 def __init__(self, revlog):
896 self.revlog = revlog
895 self.revlog = revlog
897
896
898 def buildtext(self, revinfo, fh):
897 def buildtext(self, revinfo, fh):
899 """Builds a fulltext version of a revision
898 """Builds a fulltext version of a revision
900
899
901 revinfo: _revisioninfo instance that contains all needed info
900 revinfo: _revisioninfo instance that contains all needed info
902 fh: file handle to either the .i or the .d revlog file,
901 fh: file handle to either the .i or the .d revlog file,
903 depending on whether it is inlined or not
902 depending on whether it is inlined or not
904 """
903 """
905 btext = revinfo.btext
904 btext = revinfo.btext
906 if btext[0] is not None:
905 if btext[0] is not None:
907 return btext[0]
906 return btext[0]
908
907
909 revlog = self.revlog
908 revlog = self.revlog
910 cachedelta = revinfo.cachedelta
909 cachedelta = revinfo.cachedelta
911 baserev = cachedelta[0]
910 baserev = cachedelta[0]
912 delta = cachedelta[1]
911 delta = cachedelta[1]
913
912
914 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
913 fulltext = btext[0] = _textfromdelta(fh, revlog, baserev, delta,
915 revinfo.p1, revinfo.p2,
914 revinfo.p1, revinfo.p2,
916 revinfo.flags, revinfo.node)
915 revinfo.flags, revinfo.node)
917 return fulltext
916 return fulltext
918
917
919 def _builddeltadiff(self, base, revinfo, fh):
918 def _builddeltadiff(self, base, revinfo, fh):
920 revlog = self.revlog
919 revlog = self.revlog
921 t = self.buildtext(revinfo, fh)
920 t = self.buildtext(revinfo, fh)
922 if revlog.iscensored(base):
921 if revlog.iscensored(base):
923 # deltas based on a censored revision must replace the
922 # deltas based on a censored revision must replace the
924 # full content in one patch, so delta works everywhere
923 # full content in one patch, so delta works everywhere
925 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
924 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
926 delta = header + t
925 delta = header + t
927 else:
926 else:
928 ptext = revlog.rawdata(base, _df=fh)
927 ptext = revlog.rawdata(base, _df=fh)
929 delta = mdiff.textdiff(ptext, t)
928 delta = mdiff.textdiff(ptext, t)
930
929
931 return delta
930 return delta
932
931
933 def _builddeltainfo(self, revinfo, base, fh):
932 def _builddeltainfo(self, revinfo, base, fh):
934 # can we use the cached delta?
933 # can we use the cached delta?
935 revlog = self.revlog
934 revlog = self.revlog
936 chainbase = revlog.chainbase(base)
935 chainbase = revlog.chainbase(base)
937 if revlog._generaldelta:
936 if revlog._generaldelta:
938 deltabase = base
937 deltabase = base
939 else:
938 else:
940 deltabase = chainbase
939 deltabase = chainbase
941 snapshotdepth = None
940 snapshotdepth = None
942 if revlog._sparserevlog and deltabase == nullrev:
941 if revlog._sparserevlog and deltabase == nullrev:
943 snapshotdepth = 0
942 snapshotdepth = 0
944 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
943 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
945 # A delta chain should always be one full snapshot,
944 # A delta chain should always be one full snapshot,
946 # zero or more semi-snapshots, and zero or more deltas
945 # zero or more semi-snapshots, and zero or more deltas
947 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
946 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
948 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
947 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
949 snapshotdepth = len(revlog._deltachain(deltabase)[0])
948 snapshotdepth = len(revlog._deltachain(deltabase)[0])
950 delta = None
949 delta = None
951 if revinfo.cachedelta:
950 if revinfo.cachedelta:
952 cachebase, cachediff = revinfo.cachedelta
951 cachebase, cachediff = revinfo.cachedelta
953 #check if the diff still apply
952 #check if the diff still apply
954 currentbase = cachebase
953 currentbase = cachebase
955 while (currentbase != nullrev
954 while (currentbase != nullrev
956 and currentbase != base
955 and currentbase != base
957 and self.revlog.length(currentbase) == 0):
956 and self.revlog.length(currentbase) == 0):
958 currentbase = self.revlog.deltaparent(currentbase)
957 currentbase = self.revlog.deltaparent(currentbase)
959 if self.revlog._lazydelta and currentbase == base:
958 if self.revlog._lazydelta and currentbase == base:
960 delta = revinfo.cachedelta[1]
959 delta = revinfo.cachedelta[1]
961 if delta is None:
960 if delta is None:
962 delta = self._builddeltadiff(base, revinfo, fh)
961 delta = self._builddeltadiff(base, revinfo, fh)
963 # snapshotdept need to be neither None nor 0 level snapshot
962 # snapshotdept need to be neither None nor 0 level snapshot
964 if revlog.upperboundcomp is not None and snapshotdepth:
963 if revlog.upperboundcomp is not None and snapshotdepth:
965 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
964 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
966 snapshotlimit = revinfo.textlen >> snapshotdepth
965 snapshotlimit = revinfo.textlen >> snapshotdepth
967 if snapshotlimit < lowestrealisticdeltalen:
966 if snapshotlimit < lowestrealisticdeltalen:
968 return None
967 return None
969 if revlog.length(base) < lowestrealisticdeltalen:
968 if revlog.length(base) < lowestrealisticdeltalen:
970 return None
969 return None
971 header, data = revlog.compress(delta)
970 header, data = revlog.compress(delta)
972 deltalen = len(header) + len(data)
971 deltalen = len(header) + len(data)
973 offset = revlog.end(len(revlog) - 1)
972 offset = revlog.end(len(revlog) - 1)
974 dist = deltalen + offset - revlog.start(chainbase)
973 dist = deltalen + offset - revlog.start(chainbase)
975 chainlen, compresseddeltalen = revlog._chaininfo(base)
974 chainlen, compresseddeltalen = revlog._chaininfo(base)
976 chainlen += 1
975 chainlen += 1
977 compresseddeltalen += deltalen
976 compresseddeltalen += deltalen
978
977
979 return _deltainfo(dist, deltalen, (header, data), deltabase,
978 return _deltainfo(dist, deltalen, (header, data), deltabase,
980 chainbase, chainlen, compresseddeltalen,
979 chainbase, chainlen, compresseddeltalen,
981 snapshotdepth)
980 snapshotdepth)
982
981
983 def _fullsnapshotinfo(self, fh, revinfo):
982 def _fullsnapshotinfo(self, fh, revinfo):
984 curr = len(self.revlog)
983 curr = len(self.revlog)
985 rawtext = self.buildtext(revinfo, fh)
984 rawtext = self.buildtext(revinfo, fh)
986 data = self.revlog.compress(rawtext)
985 data = self.revlog.compress(rawtext)
987 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
986 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
988 deltabase = chainbase = curr
987 deltabase = chainbase = curr
989 snapshotdepth = 0
988 snapshotdepth = 0
990 chainlen = 1
989 chainlen = 1
991
990
992 return _deltainfo(dist, deltalen, data, deltabase,
991 return _deltainfo(dist, deltalen, data, deltabase,
993 chainbase, chainlen, compresseddeltalen,
992 chainbase, chainlen, compresseddeltalen,
994 snapshotdepth)
993 snapshotdepth)
995
994
996 def finddeltainfo(self, revinfo, fh):
995 def finddeltainfo(self, revinfo, fh):
997 """Find an acceptable delta against a candidate revision
996 """Find an acceptable delta against a candidate revision
998
997
999 revinfo: information about the revision (instance of _revisioninfo)
998 revinfo: information about the revision (instance of _revisioninfo)
1000 fh: file handle to either the .i or the .d revlog file,
999 fh: file handle to either the .i or the .d revlog file,
1001 depending on whether it is inlined or not
1000 depending on whether it is inlined or not
1002
1001
1003 Returns the first acceptable candidate revision, as ordered by
1002 Returns the first acceptable candidate revision, as ordered by
1004 _candidategroups
1003 _candidategroups
1005
1004
1006 If no suitable deltabase is found, we return delta info for a full
1005 If no suitable deltabase is found, we return delta info for a full
1007 snapshot.
1006 snapshot.
1008 """
1007 """
1009 if not revinfo.textlen:
1008 if not revinfo.textlen:
1010 return self._fullsnapshotinfo(fh, revinfo)
1009 return self._fullsnapshotinfo(fh, revinfo)
1011
1010
1012 # no delta for flag processor revision (see "candelta" for why)
1011 # no delta for flag processor revision (see "candelta" for why)
1013 # not calling candelta since only one revision needs test, also to
1012 # not calling candelta since only one revision needs test, also to
1014 # avoid overhead fetching flags again.
1013 # avoid overhead fetching flags again.
1015 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1014 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1016 return self._fullsnapshotinfo(fh, revinfo)
1015 return self._fullsnapshotinfo(fh, revinfo)
1017
1016
1018 cachedelta = revinfo.cachedelta
1017 cachedelta = revinfo.cachedelta
1019 p1 = revinfo.p1
1018 p1 = revinfo.p1
1020 p2 = revinfo.p2
1019 p2 = revinfo.p2
1021 revlog = self.revlog
1020 revlog = self.revlog
1022
1021
1023 deltainfo = None
1022 deltainfo = None
1024 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1023 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1025 groups = _candidategroups(self.revlog, revinfo.textlen,
1024 groups = _candidategroups(self.revlog, revinfo.textlen,
1026 p1r, p2r, cachedelta)
1025 p1r, p2r, cachedelta)
1027 candidaterevs = next(groups)
1026 candidaterevs = next(groups)
1028 while candidaterevs is not None:
1027 while candidaterevs is not None:
1029 nominateddeltas = []
1028 nominateddeltas = []
1030 if deltainfo is not None:
1029 if deltainfo is not None:
1031 # if we already found a good delta,
1030 # if we already found a good delta,
1032 # challenge it against refined candidates
1031 # challenge it against refined candidates
1033 nominateddeltas.append(deltainfo)
1032 nominateddeltas.append(deltainfo)
1034 for candidaterev in candidaterevs:
1033 for candidaterev in candidaterevs:
1035 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1034 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1036 if candidatedelta is not None:
1035 if candidatedelta is not None:
1037 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1036 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1038 nominateddeltas.append(candidatedelta)
1037 nominateddeltas.append(candidatedelta)
1039 if nominateddeltas:
1038 if nominateddeltas:
1040 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1039 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1041 if deltainfo is not None:
1040 if deltainfo is not None:
1042 candidaterevs = groups.send(deltainfo.base)
1041 candidaterevs = groups.send(deltainfo.base)
1043 else:
1042 else:
1044 candidaterevs = next(groups)
1043 candidaterevs = next(groups)
1045
1044
1046 if deltainfo is None:
1045 if deltainfo is None:
1047 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1046 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1048 return deltainfo
1047 return deltainfo
General Comments 0
You need to be logged in to leave comments. Login now