##// END OF EJS Templates
deltas: at a `target_rev` parameter to finddeltainfo...
marmoute -
r48249:93f4e183 default
parent child Browse files
Show More
@@ -1,1143 +1,1147
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10 from __future__ import absolute_import
10 from __future__ import absolute_import
11
11
12 import collections
12 import collections
13 import struct
13 import struct
14
14
15 # import stuff from node for others to import from revlog
15 # import stuff from node for others to import from revlog
16 from ..node import nullrev
16 from ..node import nullrev
17 from ..i18n import _
17 from ..i18n import _
18 from ..pycompat import getattr
18 from ..pycompat import getattr
19
19
20 from .constants import (
20 from .constants import (
21 COMP_MODE_DEFAULT,
21 COMP_MODE_DEFAULT,
22 COMP_MODE_INLINE,
22 COMP_MODE_INLINE,
23 COMP_MODE_PLAIN,
23 COMP_MODE_PLAIN,
24 REVIDX_ISCENSORED,
24 REVIDX_ISCENSORED,
25 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 REVIDX_RAWTEXT_CHANGING_FLAGS,
26 )
26 )
27
27
28 from ..thirdparty import attr
28 from ..thirdparty import attr
29
29
30 from .. import (
30 from .. import (
31 error,
31 error,
32 mdiff,
32 mdiff,
33 util,
33 util,
34 )
34 )
35
35
36 from . import flagutil
36 from . import flagutil
37
37
38 # maximum <delta-chain-data>/<revision-text-length> ratio
38 # maximum <delta-chain-data>/<revision-text-length> ratio
39 LIMIT_DELTA2TEXT = 2
39 LIMIT_DELTA2TEXT = 2
40
40
41
41
42 class _testrevlog(object):
42 class _testrevlog(object):
43 """minimalist fake revlog to use in doctests"""
43 """minimalist fake revlog to use in doctests"""
44
44
45 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
45 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
46 """data is an list of revision payload boundaries"""
46 """data is an list of revision payload boundaries"""
47 self._data = data
47 self._data = data
48 self._srdensitythreshold = density
48 self._srdensitythreshold = density
49 self._srmingapsize = mingap
49 self._srmingapsize = mingap
50 self._snapshot = set(snapshot)
50 self._snapshot = set(snapshot)
51 self.index = None
51 self.index = None
52
52
53 def start(self, rev):
53 def start(self, rev):
54 if rev == nullrev:
54 if rev == nullrev:
55 return 0
55 return 0
56 if rev == 0:
56 if rev == 0:
57 return 0
57 return 0
58 return self._data[rev - 1]
58 return self._data[rev - 1]
59
59
60 def end(self, rev):
60 def end(self, rev):
61 if rev == nullrev:
61 if rev == nullrev:
62 return 0
62 return 0
63 return self._data[rev]
63 return self._data[rev]
64
64
65 def length(self, rev):
65 def length(self, rev):
66 return self.end(rev) - self.start(rev)
66 return self.end(rev) - self.start(rev)
67
67
68 def __len__(self):
68 def __len__(self):
69 return len(self._data)
69 return len(self._data)
70
70
71 def issnapshot(self, rev):
71 def issnapshot(self, rev):
72 if rev == nullrev:
72 if rev == nullrev:
73 return True
73 return True
74 return rev in self._snapshot
74 return rev in self._snapshot
75
75
76
76
77 def slicechunk(revlog, revs, targetsize=None):
77 def slicechunk(revlog, revs, targetsize=None):
78 """slice revs to reduce the amount of unrelated data to be read from disk.
78 """slice revs to reduce the amount of unrelated data to be read from disk.
79
79
80 ``revs`` is sliced into groups that should be read in one time.
80 ``revs`` is sliced into groups that should be read in one time.
81 Assume that revs are sorted.
81 Assume that revs are sorted.
82
82
83 The initial chunk is sliced until the overall density (payload/chunks-span
83 The initial chunk is sliced until the overall density (payload/chunks-span
84 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
84 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
85 `revlog._srmingapsize` is skipped.
85 `revlog._srmingapsize` is skipped.
86
86
87 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
87 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
88 For consistency with other slicing choice, this limit won't go lower than
88 For consistency with other slicing choice, this limit won't go lower than
89 `revlog._srmingapsize`.
89 `revlog._srmingapsize`.
90
90
91 If individual revisions chunk are larger than this limit, they will still
91 If individual revisions chunk are larger than this limit, they will still
92 be raised individually.
92 be raised individually.
93
93
94 >>> data = [
94 >>> data = [
95 ... 5, #00 (5)
95 ... 5, #00 (5)
96 ... 10, #01 (5)
96 ... 10, #01 (5)
97 ... 12, #02 (2)
97 ... 12, #02 (2)
98 ... 12, #03 (empty)
98 ... 12, #03 (empty)
99 ... 27, #04 (15)
99 ... 27, #04 (15)
100 ... 31, #05 (4)
100 ... 31, #05 (4)
101 ... 31, #06 (empty)
101 ... 31, #06 (empty)
102 ... 42, #07 (11)
102 ... 42, #07 (11)
103 ... 47, #08 (5)
103 ... 47, #08 (5)
104 ... 47, #09 (empty)
104 ... 47, #09 (empty)
105 ... 48, #10 (1)
105 ... 48, #10 (1)
106 ... 51, #11 (3)
106 ... 51, #11 (3)
107 ... 74, #12 (23)
107 ... 74, #12 (23)
108 ... 85, #13 (11)
108 ... 85, #13 (11)
109 ... 86, #14 (1)
109 ... 86, #14 (1)
110 ... 91, #15 (5)
110 ... 91, #15 (5)
111 ... ]
111 ... ]
112 >>> revlog = _testrevlog(data, snapshot=range(16))
112 >>> revlog = _testrevlog(data, snapshot=range(16))
113
113
114 >>> list(slicechunk(revlog, list(range(16))))
114 >>> list(slicechunk(revlog, list(range(16))))
115 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
115 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
116 >>> list(slicechunk(revlog, [0, 15]))
116 >>> list(slicechunk(revlog, [0, 15]))
117 [[0], [15]]
117 [[0], [15]]
118 >>> list(slicechunk(revlog, [0, 11, 15]))
118 >>> list(slicechunk(revlog, [0, 11, 15]))
119 [[0], [11], [15]]
119 [[0], [11], [15]]
120 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
121 [[0], [11, 13, 15]]
121 [[0], [11, 13, 15]]
122 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
122 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
123 [[1, 2], [5, 8, 10, 11], [14]]
123 [[1, 2], [5, 8, 10, 11], [14]]
124
124
125 Slicing with a maximum chunk size
125 Slicing with a maximum chunk size
126 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
126 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
127 [[0], [11], [13], [15]]
127 [[0], [11], [13], [15]]
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
129 [[0], [11], [13, 15]]
129 [[0], [11], [13, 15]]
130
130
131 Slicing involving nullrev
131 Slicing involving nullrev
132 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
133 [[-1, 0], [11], [13, 15]]
133 [[-1, 0], [11], [13, 15]]
134 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
134 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
135 [[-1], [13], [15]]
135 [[-1], [13], [15]]
136 """
136 """
137 if targetsize is not None:
137 if targetsize is not None:
138 targetsize = max(targetsize, revlog._srmingapsize)
138 targetsize = max(targetsize, revlog._srmingapsize)
139 # targetsize should not be specified when evaluating delta candidates:
139 # targetsize should not be specified when evaluating delta candidates:
140 # * targetsize is used to ensure we stay within specification when reading,
140 # * targetsize is used to ensure we stay within specification when reading,
141 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
141 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
142 if densityslicing is None:
142 if densityslicing is None:
143 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
143 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
144 for chunk in densityslicing(
144 for chunk in densityslicing(
145 revs, revlog._srdensitythreshold, revlog._srmingapsize
145 revs, revlog._srdensitythreshold, revlog._srmingapsize
146 ):
146 ):
147 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
147 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
148 yield subchunk
148 yield subchunk
149
149
150
150
151 def _slicechunktosize(revlog, revs, targetsize=None):
151 def _slicechunktosize(revlog, revs, targetsize=None):
152 """slice revs to match the target size
152 """slice revs to match the target size
153
153
154 This is intended to be used on chunk that density slicing selected by that
154 This is intended to be used on chunk that density slicing selected by that
155 are still too large compared to the read garantee of revlog. This might
155 are still too large compared to the read garantee of revlog. This might
156 happens when "minimal gap size" interrupted the slicing or when chain are
156 happens when "minimal gap size" interrupted the slicing or when chain are
157 built in a way that create large blocks next to each other.
157 built in a way that create large blocks next to each other.
158
158
159 >>> data = [
159 >>> data = [
160 ... 3, #0 (3)
160 ... 3, #0 (3)
161 ... 5, #1 (2)
161 ... 5, #1 (2)
162 ... 6, #2 (1)
162 ... 6, #2 (1)
163 ... 8, #3 (2)
163 ... 8, #3 (2)
164 ... 8, #4 (empty)
164 ... 8, #4 (empty)
165 ... 11, #5 (3)
165 ... 11, #5 (3)
166 ... 12, #6 (1)
166 ... 12, #6 (1)
167 ... 13, #7 (1)
167 ... 13, #7 (1)
168 ... 14, #8 (1)
168 ... 14, #8 (1)
169 ... ]
169 ... ]
170
170
171 == All snapshots cases ==
171 == All snapshots cases ==
172 >>> revlog = _testrevlog(data, snapshot=range(9))
172 >>> revlog = _testrevlog(data, snapshot=range(9))
173
173
174 Cases where chunk is already small enough
174 Cases where chunk is already small enough
175 >>> list(_slicechunktosize(revlog, [0], 3))
175 >>> list(_slicechunktosize(revlog, [0], 3))
176 [[0]]
176 [[0]]
177 >>> list(_slicechunktosize(revlog, [6, 7], 3))
177 >>> list(_slicechunktosize(revlog, [6, 7], 3))
178 [[6, 7]]
178 [[6, 7]]
179 >>> list(_slicechunktosize(revlog, [0], None))
179 >>> list(_slicechunktosize(revlog, [0], None))
180 [[0]]
180 [[0]]
181 >>> list(_slicechunktosize(revlog, [6, 7], None))
181 >>> list(_slicechunktosize(revlog, [6, 7], None))
182 [[6, 7]]
182 [[6, 7]]
183
183
184 cases where we need actual slicing
184 cases where we need actual slicing
185 >>> list(_slicechunktosize(revlog, [0, 1], 3))
185 >>> list(_slicechunktosize(revlog, [0, 1], 3))
186 [[0], [1]]
186 [[0], [1]]
187 >>> list(_slicechunktosize(revlog, [1, 3], 3))
187 >>> list(_slicechunktosize(revlog, [1, 3], 3))
188 [[1], [3]]
188 [[1], [3]]
189 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
189 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
190 [[1, 2], [3]]
190 [[1, 2], [3]]
191 >>> list(_slicechunktosize(revlog, [3, 5], 3))
191 >>> list(_slicechunktosize(revlog, [3, 5], 3))
192 [[3], [5]]
192 [[3], [5]]
193 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
193 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
194 [[3], [5]]
194 [[3], [5]]
195 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
195 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
196 [[5], [6, 7, 8]]
196 [[5], [6, 7, 8]]
197 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
197 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
198 [[0], [1, 2], [3], [5], [6, 7, 8]]
198 [[0], [1, 2], [3], [5], [6, 7, 8]]
199
199
200 Case with too large individual chunk (must return valid chunk)
200 Case with too large individual chunk (must return valid chunk)
201 >>> list(_slicechunktosize(revlog, [0, 1], 2))
201 >>> list(_slicechunktosize(revlog, [0, 1], 2))
202 [[0], [1]]
202 [[0], [1]]
203 >>> list(_slicechunktosize(revlog, [1, 3], 1))
203 >>> list(_slicechunktosize(revlog, [1, 3], 1))
204 [[1], [3]]
204 [[1], [3]]
205 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
205 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
206 [[3], [5]]
206 [[3], [5]]
207
207
208 == No Snapshot cases ==
208 == No Snapshot cases ==
209 >>> revlog = _testrevlog(data)
209 >>> revlog = _testrevlog(data)
210
210
211 Cases where chunk is already small enough
211 Cases where chunk is already small enough
212 >>> list(_slicechunktosize(revlog, [0], 3))
212 >>> list(_slicechunktosize(revlog, [0], 3))
213 [[0]]
213 [[0]]
214 >>> list(_slicechunktosize(revlog, [6, 7], 3))
214 >>> list(_slicechunktosize(revlog, [6, 7], 3))
215 [[6, 7]]
215 [[6, 7]]
216 >>> list(_slicechunktosize(revlog, [0], None))
216 >>> list(_slicechunktosize(revlog, [0], None))
217 [[0]]
217 [[0]]
218 >>> list(_slicechunktosize(revlog, [6, 7], None))
218 >>> list(_slicechunktosize(revlog, [6, 7], None))
219 [[6, 7]]
219 [[6, 7]]
220
220
221 cases where we need actual slicing
221 cases where we need actual slicing
222 >>> list(_slicechunktosize(revlog, [0, 1], 3))
222 >>> list(_slicechunktosize(revlog, [0, 1], 3))
223 [[0], [1]]
223 [[0], [1]]
224 >>> list(_slicechunktosize(revlog, [1, 3], 3))
224 >>> list(_slicechunktosize(revlog, [1, 3], 3))
225 [[1], [3]]
225 [[1], [3]]
226 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
226 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
227 [[1], [2, 3]]
227 [[1], [2, 3]]
228 >>> list(_slicechunktosize(revlog, [3, 5], 3))
228 >>> list(_slicechunktosize(revlog, [3, 5], 3))
229 [[3], [5]]
229 [[3], [5]]
230 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
230 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
231 [[3], [4, 5]]
231 [[3], [4, 5]]
232 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
232 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
233 [[5], [6, 7, 8]]
233 [[5], [6, 7, 8]]
234 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
234 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
235 [[0], [1, 2], [3], [5], [6, 7, 8]]
235 [[0], [1, 2], [3], [5], [6, 7, 8]]
236
236
237 Case with too large individual chunk (must return valid chunk)
237 Case with too large individual chunk (must return valid chunk)
238 >>> list(_slicechunktosize(revlog, [0, 1], 2))
238 >>> list(_slicechunktosize(revlog, [0, 1], 2))
239 [[0], [1]]
239 [[0], [1]]
240 >>> list(_slicechunktosize(revlog, [1, 3], 1))
240 >>> list(_slicechunktosize(revlog, [1, 3], 1))
241 [[1], [3]]
241 [[1], [3]]
242 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
242 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
243 [[3], [5]]
243 [[3], [5]]
244
244
245 == mixed case ==
245 == mixed case ==
246 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
246 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
247 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
247 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
248 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
248 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
249 """
249 """
250 assert targetsize is None or 0 <= targetsize
250 assert targetsize is None or 0 <= targetsize
251 startdata = revlog.start(revs[0])
251 startdata = revlog.start(revs[0])
252 enddata = revlog.end(revs[-1])
252 enddata = revlog.end(revs[-1])
253 fullspan = enddata - startdata
253 fullspan = enddata - startdata
254 if targetsize is None or fullspan <= targetsize:
254 if targetsize is None or fullspan <= targetsize:
255 yield revs
255 yield revs
256 return
256 return
257
257
258 startrevidx = 0
258 startrevidx = 0
259 endrevidx = 1
259 endrevidx = 1
260 iterrevs = enumerate(revs)
260 iterrevs = enumerate(revs)
261 next(iterrevs) # skip first rev.
261 next(iterrevs) # skip first rev.
262 # first step: get snapshots out of the way
262 # first step: get snapshots out of the way
263 for idx, r in iterrevs:
263 for idx, r in iterrevs:
264 span = revlog.end(r) - startdata
264 span = revlog.end(r) - startdata
265 snapshot = revlog.issnapshot(r)
265 snapshot = revlog.issnapshot(r)
266 if span <= targetsize and snapshot:
266 if span <= targetsize and snapshot:
267 endrevidx = idx + 1
267 endrevidx = idx + 1
268 else:
268 else:
269 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
269 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
270 if chunk:
270 if chunk:
271 yield chunk
271 yield chunk
272 startrevidx = idx
272 startrevidx = idx
273 startdata = revlog.start(r)
273 startdata = revlog.start(r)
274 endrevidx = idx + 1
274 endrevidx = idx + 1
275 if not snapshot:
275 if not snapshot:
276 break
276 break
277
277
278 # for the others, we use binary slicing to quickly converge toward valid
278 # for the others, we use binary slicing to quickly converge toward valid
279 # chunks (otherwise, we might end up looking for start/end of many
279 # chunks (otherwise, we might end up looking for start/end of many
280 # revisions). This logic is not looking for the perfect slicing point, it
280 # revisions). This logic is not looking for the perfect slicing point, it
281 # focuses on quickly converging toward valid chunks.
281 # focuses on quickly converging toward valid chunks.
282 nbitem = len(revs)
282 nbitem = len(revs)
283 while (enddata - startdata) > targetsize:
283 while (enddata - startdata) > targetsize:
284 endrevidx = nbitem
284 endrevidx = nbitem
285 if nbitem - startrevidx <= 1:
285 if nbitem - startrevidx <= 1:
286 break # protect against individual chunk larger than limit
286 break # protect against individual chunk larger than limit
287 localenddata = revlog.end(revs[endrevidx - 1])
287 localenddata = revlog.end(revs[endrevidx - 1])
288 span = localenddata - startdata
288 span = localenddata - startdata
289 while span > targetsize:
289 while span > targetsize:
290 if endrevidx - startrevidx <= 1:
290 if endrevidx - startrevidx <= 1:
291 break # protect against individual chunk larger than limit
291 break # protect against individual chunk larger than limit
292 endrevidx -= (endrevidx - startrevidx) // 2
292 endrevidx -= (endrevidx - startrevidx) // 2
293 localenddata = revlog.end(revs[endrevidx - 1])
293 localenddata = revlog.end(revs[endrevidx - 1])
294 span = localenddata - startdata
294 span = localenddata - startdata
295 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
295 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
296 if chunk:
296 if chunk:
297 yield chunk
297 yield chunk
298 startrevidx = endrevidx
298 startrevidx = endrevidx
299 startdata = revlog.start(revs[startrevidx])
299 startdata = revlog.start(revs[startrevidx])
300
300
301 chunk = _trimchunk(revlog, revs, startrevidx)
301 chunk = _trimchunk(revlog, revs, startrevidx)
302 if chunk:
302 if chunk:
303 yield chunk
303 yield chunk
304
304
305
305
306 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
306 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
307 """slice revs to reduce the amount of unrelated data to be read from disk.
307 """slice revs to reduce the amount of unrelated data to be read from disk.
308
308
309 ``revs`` is sliced into groups that should be read in one time.
309 ``revs`` is sliced into groups that should be read in one time.
310 Assume that revs are sorted.
310 Assume that revs are sorted.
311
311
312 The initial chunk is sliced until the overall density (payload/chunks-span
312 The initial chunk is sliced until the overall density (payload/chunks-span
313 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
313 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
314 skipped.
314 skipped.
315
315
316 >>> revlog = _testrevlog([
316 >>> revlog = _testrevlog([
317 ... 5, #00 (5)
317 ... 5, #00 (5)
318 ... 10, #01 (5)
318 ... 10, #01 (5)
319 ... 12, #02 (2)
319 ... 12, #02 (2)
320 ... 12, #03 (empty)
320 ... 12, #03 (empty)
321 ... 27, #04 (15)
321 ... 27, #04 (15)
322 ... 31, #05 (4)
322 ... 31, #05 (4)
323 ... 31, #06 (empty)
323 ... 31, #06 (empty)
324 ... 42, #07 (11)
324 ... 42, #07 (11)
325 ... 47, #08 (5)
325 ... 47, #08 (5)
326 ... 47, #09 (empty)
326 ... 47, #09 (empty)
327 ... 48, #10 (1)
327 ... 48, #10 (1)
328 ... 51, #11 (3)
328 ... 51, #11 (3)
329 ... 74, #12 (23)
329 ... 74, #12 (23)
330 ... 85, #13 (11)
330 ... 85, #13 (11)
331 ... 86, #14 (1)
331 ... 86, #14 (1)
332 ... 91, #15 (5)
332 ... 91, #15 (5)
333 ... ])
333 ... ])
334
334
335 >>> list(_slicechunktodensity(revlog, list(range(16))))
335 >>> list(_slicechunktodensity(revlog, list(range(16))))
336 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
336 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
337 >>> list(_slicechunktodensity(revlog, [0, 15]))
337 >>> list(_slicechunktodensity(revlog, [0, 15]))
338 [[0], [15]]
338 [[0], [15]]
339 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
339 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
340 [[0], [11], [15]]
340 [[0], [11], [15]]
341 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
342 [[0], [11, 13, 15]]
342 [[0], [11, 13, 15]]
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
343 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
344 [[1, 2], [5, 8, 10, 11], [14]]
344 [[1, 2], [5, 8, 10, 11], [14]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
346 ... mingapsize=20))
346 ... mingapsize=20))
347 [[1, 2, 3, 5, 8, 10, 11], [14]]
347 [[1, 2, 3, 5, 8, 10, 11], [14]]
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 ... targetdensity=0.95))
349 ... targetdensity=0.95))
350 [[1, 2], [5], [8, 10, 11], [14]]
350 [[1, 2], [5], [8, 10, 11], [14]]
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 ... targetdensity=0.95, mingapsize=12))
352 ... targetdensity=0.95, mingapsize=12))
353 [[1, 2], [5, 8, 10, 11], [14]]
353 [[1, 2], [5, 8, 10, 11], [14]]
354 """
354 """
355 start = revlog.start
355 start = revlog.start
356 length = revlog.length
356 length = revlog.length
357
357
358 if len(revs) <= 1:
358 if len(revs) <= 1:
359 yield revs
359 yield revs
360 return
360 return
361
361
362 deltachainspan = segmentspan(revlog, revs)
362 deltachainspan = segmentspan(revlog, revs)
363
363
364 if deltachainspan < mingapsize:
364 if deltachainspan < mingapsize:
365 yield revs
365 yield revs
366 return
366 return
367
367
368 readdata = deltachainspan
368 readdata = deltachainspan
369 chainpayload = sum(length(r) for r in revs)
369 chainpayload = sum(length(r) for r in revs)
370
370
371 if deltachainspan:
371 if deltachainspan:
372 density = chainpayload / float(deltachainspan)
372 density = chainpayload / float(deltachainspan)
373 else:
373 else:
374 density = 1.0
374 density = 1.0
375
375
376 if density >= targetdensity:
376 if density >= targetdensity:
377 yield revs
377 yield revs
378 return
378 return
379
379
380 # Store the gaps in a heap to have them sorted by decreasing size
380 # Store the gaps in a heap to have them sorted by decreasing size
381 gaps = []
381 gaps = []
382 prevend = None
382 prevend = None
383 for i, rev in enumerate(revs):
383 for i, rev in enumerate(revs):
384 revstart = start(rev)
384 revstart = start(rev)
385 revlen = length(rev)
385 revlen = length(rev)
386
386
387 # Skip empty revisions to form larger holes
387 # Skip empty revisions to form larger holes
388 if revlen == 0:
388 if revlen == 0:
389 continue
389 continue
390
390
391 if prevend is not None:
391 if prevend is not None:
392 gapsize = revstart - prevend
392 gapsize = revstart - prevend
393 # only consider holes that are large enough
393 # only consider holes that are large enough
394 if gapsize > mingapsize:
394 if gapsize > mingapsize:
395 gaps.append((gapsize, i))
395 gaps.append((gapsize, i))
396
396
397 prevend = revstart + revlen
397 prevend = revstart + revlen
398 # sort the gaps to pop them from largest to small
398 # sort the gaps to pop them from largest to small
399 gaps.sort()
399 gaps.sort()
400
400
401 # Collect the indices of the largest holes until the density is acceptable
401 # Collect the indices of the largest holes until the density is acceptable
402 selected = []
402 selected = []
403 while gaps and density < targetdensity:
403 while gaps and density < targetdensity:
404 gapsize, gapidx = gaps.pop()
404 gapsize, gapidx = gaps.pop()
405
405
406 selected.append(gapidx)
406 selected.append(gapidx)
407
407
408 # the gap sizes are stored as negatives to be sorted decreasingly
408 # the gap sizes are stored as negatives to be sorted decreasingly
409 # by the heap
409 # by the heap
410 readdata -= gapsize
410 readdata -= gapsize
411 if readdata > 0:
411 if readdata > 0:
412 density = chainpayload / float(readdata)
412 density = chainpayload / float(readdata)
413 else:
413 else:
414 density = 1.0
414 density = 1.0
415 selected.sort()
415 selected.sort()
416
416
417 # Cut the revs at collected indices
417 # Cut the revs at collected indices
418 previdx = 0
418 previdx = 0
419 for idx in selected:
419 for idx in selected:
420
420
421 chunk = _trimchunk(revlog, revs, previdx, idx)
421 chunk = _trimchunk(revlog, revs, previdx, idx)
422 if chunk:
422 if chunk:
423 yield chunk
423 yield chunk
424
424
425 previdx = idx
425 previdx = idx
426
426
427 chunk = _trimchunk(revlog, revs, previdx)
427 chunk = _trimchunk(revlog, revs, previdx)
428 if chunk:
428 if chunk:
429 yield chunk
429 yield chunk
430
430
431
431
432 def _trimchunk(revlog, revs, startidx, endidx=None):
432 def _trimchunk(revlog, revs, startidx, endidx=None):
433 """returns revs[startidx:endidx] without empty trailing revs
433 """returns revs[startidx:endidx] without empty trailing revs
434
434
435 Doctest Setup
435 Doctest Setup
436 >>> revlog = _testrevlog([
436 >>> revlog = _testrevlog([
437 ... 5, #0
437 ... 5, #0
438 ... 10, #1
438 ... 10, #1
439 ... 12, #2
439 ... 12, #2
440 ... 12, #3 (empty)
440 ... 12, #3 (empty)
441 ... 17, #4
441 ... 17, #4
442 ... 21, #5
442 ... 21, #5
443 ... 21, #6 (empty)
443 ... 21, #6 (empty)
444 ... ])
444 ... ])
445
445
446 Contiguous cases:
446 Contiguous cases:
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
447 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
448 [0, 1, 2, 3, 4, 5]
448 [0, 1, 2, 3, 4, 5]
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
450 [0, 1, 2, 3, 4]
450 [0, 1, 2, 3, 4]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
452 [0, 1, 2]
452 [0, 1, 2]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
454 [2]
454 [2]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
456 [3, 4, 5]
456 [3, 4, 5]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
458 [3, 4]
458 [3, 4]
459
459
460 Discontiguous cases:
460 Discontiguous cases:
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
461 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
462 [1, 3, 5]
462 [1, 3, 5]
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
464 [1]
464 [1]
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
466 [3, 5]
466 [3, 5]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
468 [3, 5]
468 [3, 5]
469 """
469 """
470 length = revlog.length
470 length = revlog.length
471
471
472 if endidx is None:
472 if endidx is None:
473 endidx = len(revs)
473 endidx = len(revs)
474
474
475 # If we have a non-emtpy delta candidate, there are nothing to trim
475 # If we have a non-emtpy delta candidate, there are nothing to trim
476 if revs[endidx - 1] < len(revlog):
476 if revs[endidx - 1] < len(revlog):
477 # Trim empty revs at the end, except the very first revision of a chain
477 # Trim empty revs at the end, except the very first revision of a chain
478 while (
478 while (
479 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
479 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
480 ):
480 ):
481 endidx -= 1
481 endidx -= 1
482
482
483 return revs[startidx:endidx]
483 return revs[startidx:endidx]
484
484
485
485
486 def segmentspan(revlog, revs):
486 def segmentspan(revlog, revs):
487 """Get the byte span of a segment of revisions
487 """Get the byte span of a segment of revisions
488
488
489 revs is a sorted array of revision numbers
489 revs is a sorted array of revision numbers
490
490
491 >>> revlog = _testrevlog([
491 >>> revlog = _testrevlog([
492 ... 5, #0
492 ... 5, #0
493 ... 10, #1
493 ... 10, #1
494 ... 12, #2
494 ... 12, #2
495 ... 12, #3 (empty)
495 ... 12, #3 (empty)
496 ... 17, #4
496 ... 17, #4
497 ... ])
497 ... ])
498
498
499 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
499 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
500 17
500 17
501 >>> segmentspan(revlog, [0, 4])
501 >>> segmentspan(revlog, [0, 4])
502 17
502 17
503 >>> segmentspan(revlog, [3, 4])
503 >>> segmentspan(revlog, [3, 4])
504 5
504 5
505 >>> segmentspan(revlog, [1, 2, 3,])
505 >>> segmentspan(revlog, [1, 2, 3,])
506 7
506 7
507 >>> segmentspan(revlog, [1, 3])
507 >>> segmentspan(revlog, [1, 3])
508 7
508 7
509 """
509 """
510 if not revs:
510 if not revs:
511 return 0
511 return 0
512 end = revlog.end(revs[-1])
512 end = revlog.end(revs[-1])
513 return end - revlog.start(revs[0])
513 return end - revlog.start(revs[0])
514
514
515
515
516 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
516 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
517 """build full text from a (base, delta) pair and other metadata"""
517 """build full text from a (base, delta) pair and other metadata"""
518 # special case deltas which replace entire base; no need to decode
518 # special case deltas which replace entire base; no need to decode
519 # base revision. this neatly avoids censored bases, which throw when
519 # base revision. this neatly avoids censored bases, which throw when
520 # they're decoded.
520 # they're decoded.
521 hlen = struct.calcsize(b">lll")
521 hlen = struct.calcsize(b">lll")
522 if delta[:hlen] == mdiff.replacediffheader(
522 if delta[:hlen] == mdiff.replacediffheader(
523 revlog.rawsize(baserev), len(delta) - hlen
523 revlog.rawsize(baserev), len(delta) - hlen
524 ):
524 ):
525 fulltext = delta[hlen:]
525 fulltext = delta[hlen:]
526 else:
526 else:
527 # deltabase is rawtext before changed by flag processors, which is
527 # deltabase is rawtext before changed by flag processors, which is
528 # equivalent to non-raw text
528 # equivalent to non-raw text
529 basetext = revlog.revision(baserev, _df=fh, raw=False)
529 basetext = revlog.revision(baserev, _df=fh, raw=False)
530 fulltext = mdiff.patch(basetext, delta)
530 fulltext = mdiff.patch(basetext, delta)
531
531
532 try:
532 try:
533 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
533 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
534 if validatehash:
534 if validatehash:
535 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
535 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
536 if flags & REVIDX_ISCENSORED:
536 if flags & REVIDX_ISCENSORED:
537 raise error.StorageError(
537 raise error.StorageError(
538 _(b'node %s is not censored') % expectednode
538 _(b'node %s is not censored') % expectednode
539 )
539 )
540 except error.CensoredNodeError:
540 except error.CensoredNodeError:
541 # must pass the censored index flag to add censored revisions
541 # must pass the censored index flag to add censored revisions
542 if not flags & REVIDX_ISCENSORED:
542 if not flags & REVIDX_ISCENSORED:
543 raise
543 raise
544 return fulltext
544 return fulltext
545
545
546
546
547 @attr.s(slots=True, frozen=True)
547 @attr.s(slots=True, frozen=True)
548 class _deltainfo(object):
548 class _deltainfo(object):
549 distance = attr.ib()
549 distance = attr.ib()
550 deltalen = attr.ib()
550 deltalen = attr.ib()
551 data = attr.ib()
551 data = attr.ib()
552 base = attr.ib()
552 base = attr.ib()
553 chainbase = attr.ib()
553 chainbase = attr.ib()
554 chainlen = attr.ib()
554 chainlen = attr.ib()
555 compresseddeltalen = attr.ib()
555 compresseddeltalen = attr.ib()
556 snapshotdepth = attr.ib()
556 snapshotdepth = attr.ib()
557
557
558
558
559 def drop_u_compression(delta):
559 def drop_u_compression(delta):
560 """turn into a "u" (no-compression) into no-compression without header
560 """turn into a "u" (no-compression) into no-compression without header
561
561
562 This is useful for revlog format that has better compression method.
562 This is useful for revlog format that has better compression method.
563 """
563 """
564 assert delta.data[0] == b'u', delta.data[0]
564 assert delta.data[0] == b'u', delta.data[0]
565 return _deltainfo(
565 return _deltainfo(
566 delta.distance,
566 delta.distance,
567 delta.deltalen - 1,
567 delta.deltalen - 1,
568 (b'', delta.data[1]),
568 (b'', delta.data[1]),
569 delta.base,
569 delta.base,
570 delta.chainbase,
570 delta.chainbase,
571 delta.chainlen,
571 delta.chainlen,
572 delta.compresseddeltalen,
572 delta.compresseddeltalen,
573 delta.snapshotdepth,
573 delta.snapshotdepth,
574 )
574 )
575
575
576
576
577 def isgooddeltainfo(revlog, deltainfo, revinfo):
577 def isgooddeltainfo(revlog, deltainfo, revinfo):
578 """Returns True if the given delta is good. Good means that it is within
578 """Returns True if the given delta is good. Good means that it is within
579 the disk span, disk size, and chain length bounds that we know to be
579 the disk span, disk size, and chain length bounds that we know to be
580 performant."""
580 performant."""
581 if deltainfo is None:
581 if deltainfo is None:
582 return False
582 return False
583
583
584 # - 'deltainfo.distance' is the distance from the base revision --
584 # - 'deltainfo.distance' is the distance from the base revision --
585 # bounding it limits the amount of I/O we need to do.
585 # bounding it limits the amount of I/O we need to do.
586 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
586 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
587 # deltas we need to apply -- bounding it limits the amount of CPU
587 # deltas we need to apply -- bounding it limits the amount of CPU
588 # we consume.
588 # we consume.
589
589
590 textlen = revinfo.textlen
590 textlen = revinfo.textlen
591 defaultmax = textlen * 4
591 defaultmax = textlen * 4
592 maxdist = revlog._maxdeltachainspan
592 maxdist = revlog._maxdeltachainspan
593 if not maxdist:
593 if not maxdist:
594 maxdist = deltainfo.distance # ensure the conditional pass
594 maxdist = deltainfo.distance # ensure the conditional pass
595 maxdist = max(maxdist, defaultmax)
595 maxdist = max(maxdist, defaultmax)
596
596
597 # Bad delta from read span:
597 # Bad delta from read span:
598 #
598 #
599 # If the span of data read is larger than the maximum allowed.
599 # If the span of data read is larger than the maximum allowed.
600 #
600 #
601 # In the sparse-revlog case, we rely on the associated "sparse reading"
601 # In the sparse-revlog case, we rely on the associated "sparse reading"
602 # to avoid issue related to the span of data. In theory, it would be
602 # to avoid issue related to the span of data. In theory, it would be
603 # possible to build pathological revlog where delta pattern would lead
603 # possible to build pathological revlog where delta pattern would lead
604 # to too many reads. However, they do not happen in practice at all. So
604 # to too many reads. However, they do not happen in practice at all. So
605 # we skip the span check entirely.
605 # we skip the span check entirely.
606 if not revlog._sparserevlog and maxdist < deltainfo.distance:
606 if not revlog._sparserevlog and maxdist < deltainfo.distance:
607 return False
607 return False
608
608
609 # Bad delta from new delta size:
609 # Bad delta from new delta size:
610 #
610 #
611 # If the delta size is larger than the target text, storing the
611 # If the delta size is larger than the target text, storing the
612 # delta will be inefficient.
612 # delta will be inefficient.
613 if textlen < deltainfo.deltalen:
613 if textlen < deltainfo.deltalen:
614 return False
614 return False
615
615
616 # Bad delta from cumulated payload size:
616 # Bad delta from cumulated payload size:
617 #
617 #
618 # If the sum of delta get larger than K * target text length.
618 # If the sum of delta get larger than K * target text length.
619 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
619 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
620 return False
620 return False
621
621
622 # Bad delta from chain length:
622 # Bad delta from chain length:
623 #
623 #
624 # If the number of delta in the chain gets too high.
624 # If the number of delta in the chain gets too high.
625 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
625 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
626 return False
626 return False
627
627
628 # bad delta from intermediate snapshot size limit
628 # bad delta from intermediate snapshot size limit
629 #
629 #
630 # If an intermediate snapshot size is higher than the limit. The
630 # If an intermediate snapshot size is higher than the limit. The
631 # limit exist to prevent endless chain of intermediate delta to be
631 # limit exist to prevent endless chain of intermediate delta to be
632 # created.
632 # created.
633 if (
633 if (
634 deltainfo.snapshotdepth is not None
634 deltainfo.snapshotdepth is not None
635 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
635 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
636 ):
636 ):
637 return False
637 return False
638
638
639 # bad delta if new intermediate snapshot is larger than the previous
639 # bad delta if new intermediate snapshot is larger than the previous
640 # snapshot
640 # snapshot
641 if (
641 if (
642 deltainfo.snapshotdepth
642 deltainfo.snapshotdepth
643 and revlog.length(deltainfo.base) < deltainfo.deltalen
643 and revlog.length(deltainfo.base) < deltainfo.deltalen
644 ):
644 ):
645 return False
645 return False
646
646
647 return True
647 return True
648
648
649
649
650 # If a revision's full text is that much bigger than a base candidate full
650 # If a revision's full text is that much bigger than a base candidate full
651 # text's, it is very unlikely that it will produce a valid delta. We no longer
651 # text's, it is very unlikely that it will produce a valid delta. We no longer
652 # consider these candidates.
652 # consider these candidates.
653 LIMIT_BASE2TEXT = 500
653 LIMIT_BASE2TEXT = 500
654
654
655
655
656 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
656 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
657 """Provides group of revision to be tested as delta base
657 """Provides group of revision to be tested as delta base
658
658
659 This top level function focus on emitting groups with unique and worthwhile
659 This top level function focus on emitting groups with unique and worthwhile
660 content. See _raw_candidate_groups for details about the group order.
660 content. See _raw_candidate_groups for details about the group order.
661 """
661 """
662 # should we try to build a delta?
662 # should we try to build a delta?
663 if not (len(revlog) and revlog._storedeltachains):
663 if not (len(revlog) and revlog._storedeltachains):
664 yield None
664 yield None
665 return
665 return
666
666
667 deltalength = revlog.length
667 deltalength = revlog.length
668 deltaparent = revlog.deltaparent
668 deltaparent = revlog.deltaparent
669 sparse = revlog._sparserevlog
669 sparse = revlog._sparserevlog
670 good = None
670 good = None
671
671
672 deltas_limit = textlen * LIMIT_DELTA2TEXT
672 deltas_limit = textlen * LIMIT_DELTA2TEXT
673
673
674 tested = {nullrev}
674 tested = {nullrev}
675 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
675 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
676 while True:
676 while True:
677 temptative = candidates.send(good)
677 temptative = candidates.send(good)
678 if temptative is None:
678 if temptative is None:
679 break
679 break
680 group = []
680 group = []
681 for rev in temptative:
681 for rev in temptative:
682 # skip over empty delta (no need to include them in a chain)
682 # skip over empty delta (no need to include them in a chain)
683 while revlog._generaldelta and not (
683 while revlog._generaldelta and not (
684 rev == nullrev or rev in tested or deltalength(rev)
684 rev == nullrev or rev in tested or deltalength(rev)
685 ):
685 ):
686 tested.add(rev)
686 tested.add(rev)
687 rev = deltaparent(rev)
687 rev = deltaparent(rev)
688 # no need to try a delta against nullrev, this will be done as a
688 # no need to try a delta against nullrev, this will be done as a
689 # last resort.
689 # last resort.
690 if rev == nullrev:
690 if rev == nullrev:
691 continue
691 continue
692 # filter out revision we tested already
692 # filter out revision we tested already
693 if rev in tested:
693 if rev in tested:
694 continue
694 continue
695 tested.add(rev)
695 tested.add(rev)
696 # filter out delta base that will never produce good delta
696 # filter out delta base that will never produce good delta
697 if deltas_limit < revlog.length(rev):
697 if deltas_limit < revlog.length(rev):
698 continue
698 continue
699 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
699 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
700 continue
700 continue
701 # no delta for rawtext-changing revs (see "candelta" for why)
701 # no delta for rawtext-changing revs (see "candelta" for why)
702 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
702 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
703 continue
703 continue
704 # If we reach here, we are about to build and test a delta.
704 # If we reach here, we are about to build and test a delta.
705 # The delta building process will compute the chaininfo in all
705 # The delta building process will compute the chaininfo in all
706 # case, since that computation is cached, it is fine to access it
706 # case, since that computation is cached, it is fine to access it
707 # here too.
707 # here too.
708 chainlen, chainsize = revlog._chaininfo(rev)
708 chainlen, chainsize = revlog._chaininfo(rev)
709 # if chain will be too long, skip base
709 # if chain will be too long, skip base
710 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
710 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
711 continue
711 continue
712 # if chain already have too much data, skip base
712 # if chain already have too much data, skip base
713 if deltas_limit < chainsize:
713 if deltas_limit < chainsize:
714 continue
714 continue
715 if sparse and revlog.upperboundcomp is not None:
715 if sparse and revlog.upperboundcomp is not None:
716 maxcomp = revlog.upperboundcomp
716 maxcomp = revlog.upperboundcomp
717 basenotsnap = (p1, p2, nullrev)
717 basenotsnap = (p1, p2, nullrev)
718 if rev not in basenotsnap and revlog.issnapshot(rev):
718 if rev not in basenotsnap and revlog.issnapshot(rev):
719 snapshotdepth = revlog.snapshotdepth(rev)
719 snapshotdepth = revlog.snapshotdepth(rev)
720 # If text is significantly larger than the base, we can
720 # If text is significantly larger than the base, we can
721 # expect the resulting delta to be proportional to the size
721 # expect the resulting delta to be proportional to the size
722 # difference
722 # difference
723 revsize = revlog.rawsize(rev)
723 revsize = revlog.rawsize(rev)
724 rawsizedistance = max(textlen - revsize, 0)
724 rawsizedistance = max(textlen - revsize, 0)
725 # use an estimate of the compression upper bound.
725 # use an estimate of the compression upper bound.
726 lowestrealisticdeltalen = rawsizedistance // maxcomp
726 lowestrealisticdeltalen = rawsizedistance // maxcomp
727
727
728 # check the absolute constraint on the delta size
728 # check the absolute constraint on the delta size
729 snapshotlimit = textlen >> snapshotdepth
729 snapshotlimit = textlen >> snapshotdepth
730 if snapshotlimit < lowestrealisticdeltalen:
730 if snapshotlimit < lowestrealisticdeltalen:
731 # delta lower bound is larger than accepted upper bound
731 # delta lower bound is larger than accepted upper bound
732 continue
732 continue
733
733
734 # check the relative constraint on the delta size
734 # check the relative constraint on the delta size
735 revlength = revlog.length(rev)
735 revlength = revlog.length(rev)
736 if revlength < lowestrealisticdeltalen:
736 if revlength < lowestrealisticdeltalen:
737 # delta probable lower bound is larger than target base
737 # delta probable lower bound is larger than target base
738 continue
738 continue
739
739
740 group.append(rev)
740 group.append(rev)
741 if group:
741 if group:
742 # XXX: in the sparse revlog case, group can become large,
742 # XXX: in the sparse revlog case, group can become large,
743 # impacting performances. Some bounding or slicing mecanism
743 # impacting performances. Some bounding or slicing mecanism
744 # would help to reduce this impact.
744 # would help to reduce this impact.
745 good = yield tuple(group)
745 good = yield tuple(group)
746 yield None
746 yield None
747
747
748
748
749 def _findsnapshots(revlog, cache, start_rev):
749 def _findsnapshots(revlog, cache, start_rev):
750 """find snapshot from start_rev to tip"""
750 """find snapshot from start_rev to tip"""
751 if util.safehasattr(revlog.index, b'findsnapshots'):
751 if util.safehasattr(revlog.index, b'findsnapshots'):
752 revlog.index.findsnapshots(cache, start_rev)
752 revlog.index.findsnapshots(cache, start_rev)
753 else:
753 else:
754 deltaparent = revlog.deltaparent
754 deltaparent = revlog.deltaparent
755 issnapshot = revlog.issnapshot
755 issnapshot = revlog.issnapshot
756 for rev in revlog.revs(start_rev):
756 for rev in revlog.revs(start_rev):
757 if issnapshot(rev):
757 if issnapshot(rev):
758 cache[deltaparent(rev)].append(rev)
758 cache[deltaparent(rev)].append(rev)
759
759
760
760
761 def _refinedgroups(revlog, p1, p2, cachedelta):
761 def _refinedgroups(revlog, p1, p2, cachedelta):
762 good = None
762 good = None
763 # First we try to reuse a the delta contained in the bundle.
763 # First we try to reuse a the delta contained in the bundle.
764 # (or from the source revlog)
764 # (or from the source revlog)
765 #
765 #
766 # This logic only applies to general delta repositories and can be disabled
766 # This logic only applies to general delta repositories and can be disabled
767 # through configuration. Disabling reuse source delta is useful when
767 # through configuration. Disabling reuse source delta is useful when
768 # we want to make sure we recomputed "optimal" deltas.
768 # we want to make sure we recomputed "optimal" deltas.
769 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
769 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
770 # Assume what we received from the server is a good choice
770 # Assume what we received from the server is a good choice
771 # build delta will reuse the cache
771 # build delta will reuse the cache
772 good = yield (cachedelta[0],)
772 good = yield (cachedelta[0],)
773 if good is not None:
773 if good is not None:
774 yield None
774 yield None
775 return
775 return
776 snapshots = collections.defaultdict(list)
776 snapshots = collections.defaultdict(list)
777 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
777 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
778 good = yield candidates
778 good = yield candidates
779 if good is not None:
779 if good is not None:
780 break
780 break
781
781
782 # If sparse revlog is enabled, we can try to refine the available deltas
782 # If sparse revlog is enabled, we can try to refine the available deltas
783 if not revlog._sparserevlog:
783 if not revlog._sparserevlog:
784 yield None
784 yield None
785 return
785 return
786
786
787 # if we have a refinable value, try to refine it
787 # if we have a refinable value, try to refine it
788 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
788 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
789 # refine snapshot down
789 # refine snapshot down
790 previous = None
790 previous = None
791 while previous != good:
791 while previous != good:
792 previous = good
792 previous = good
793 base = revlog.deltaparent(good)
793 base = revlog.deltaparent(good)
794 if base == nullrev:
794 if base == nullrev:
795 break
795 break
796 good = yield (base,)
796 good = yield (base,)
797 # refine snapshot up
797 # refine snapshot up
798 if not snapshots:
798 if not snapshots:
799 _findsnapshots(revlog, snapshots, good + 1)
799 _findsnapshots(revlog, snapshots, good + 1)
800 previous = None
800 previous = None
801 while good != previous:
801 while good != previous:
802 previous = good
802 previous = good
803 children = tuple(sorted(c for c in snapshots[good]))
803 children = tuple(sorted(c for c in snapshots[good]))
804 good = yield children
804 good = yield children
805
805
806 # we have found nothing
806 # we have found nothing
807 yield None
807 yield None
808
808
809
809
810 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
810 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
811 """Provides group of revision to be tested as delta base
811 """Provides group of revision to be tested as delta base
812
812
813 This lower level function focus on emitting delta theorically interresting
813 This lower level function focus on emitting delta theorically interresting
814 without looking it any practical details.
814 without looking it any practical details.
815
815
816 The group order aims at providing fast or small candidates first.
816 The group order aims at providing fast or small candidates first.
817 """
817 """
818 gdelta = revlog._generaldelta
818 gdelta = revlog._generaldelta
819 # gate sparse behind general-delta because of issue6056
819 # gate sparse behind general-delta because of issue6056
820 sparse = gdelta and revlog._sparserevlog
820 sparse = gdelta and revlog._sparserevlog
821 curr = len(revlog)
821 curr = len(revlog)
822 prev = curr - 1
822 prev = curr - 1
823 deltachain = lambda rev: revlog._deltachain(rev)[0]
823 deltachain = lambda rev: revlog._deltachain(rev)[0]
824
824
825 if gdelta:
825 if gdelta:
826 # exclude already lazy tested base if any
826 # exclude already lazy tested base if any
827 parents = [p for p in (p1, p2) if p != nullrev]
827 parents = [p for p in (p1, p2) if p != nullrev]
828
828
829 if not revlog._deltabothparents and len(parents) == 2:
829 if not revlog._deltabothparents and len(parents) == 2:
830 parents.sort()
830 parents.sort()
831 # To minimize the chance of having to build a fulltext,
831 # To minimize the chance of having to build a fulltext,
832 # pick first whichever parent is closest to us (max rev)
832 # pick first whichever parent is closest to us (max rev)
833 yield (parents[1],)
833 yield (parents[1],)
834 # then the other one (min rev) if the first did not fit
834 # then the other one (min rev) if the first did not fit
835 yield (parents[0],)
835 yield (parents[0],)
836 elif len(parents) > 0:
836 elif len(parents) > 0:
837 # Test all parents (1 or 2), and keep the best candidate
837 # Test all parents (1 or 2), and keep the best candidate
838 yield parents
838 yield parents
839
839
840 if sparse and parents:
840 if sparse and parents:
841 if snapshots is None:
841 if snapshots is None:
842 # map: base-rev: snapshot-rev
842 # map: base-rev: snapshot-rev
843 snapshots = collections.defaultdict(list)
843 snapshots = collections.defaultdict(list)
844 # See if we can use an existing snapshot in the parent chains to use as
844 # See if we can use an existing snapshot in the parent chains to use as
845 # a base for a new intermediate-snapshot
845 # a base for a new intermediate-snapshot
846 #
846 #
847 # search for snapshot in parents delta chain
847 # search for snapshot in parents delta chain
848 # map: snapshot-level: snapshot-rev
848 # map: snapshot-level: snapshot-rev
849 parents_snaps = collections.defaultdict(set)
849 parents_snaps = collections.defaultdict(set)
850 candidate_chains = [deltachain(p) for p in parents]
850 candidate_chains = [deltachain(p) for p in parents]
851 for chain in candidate_chains:
851 for chain in candidate_chains:
852 for idx, s in enumerate(chain):
852 for idx, s in enumerate(chain):
853 if not revlog.issnapshot(s):
853 if not revlog.issnapshot(s):
854 break
854 break
855 parents_snaps[idx].add(s)
855 parents_snaps[idx].add(s)
856 snapfloor = min(parents_snaps[0]) + 1
856 snapfloor = min(parents_snaps[0]) + 1
857 _findsnapshots(revlog, snapshots, snapfloor)
857 _findsnapshots(revlog, snapshots, snapfloor)
858 # search for the highest "unrelated" revision
858 # search for the highest "unrelated" revision
859 #
859 #
860 # Adding snapshots used by "unrelated" revision increase the odd we
860 # Adding snapshots used by "unrelated" revision increase the odd we
861 # reuse an independant, yet better snapshot chain.
861 # reuse an independant, yet better snapshot chain.
862 #
862 #
863 # XXX instead of building a set of revisions, we could lazily enumerate
863 # XXX instead of building a set of revisions, we could lazily enumerate
864 # over the chains. That would be more efficient, however we stick to
864 # over the chains. That would be more efficient, however we stick to
865 # simple code for now.
865 # simple code for now.
866 all_revs = set()
866 all_revs = set()
867 for chain in candidate_chains:
867 for chain in candidate_chains:
868 all_revs.update(chain)
868 all_revs.update(chain)
869 other = None
869 other = None
870 for r in revlog.revs(prev, snapfloor):
870 for r in revlog.revs(prev, snapfloor):
871 if r not in all_revs:
871 if r not in all_revs:
872 other = r
872 other = r
873 break
873 break
874 if other is not None:
874 if other is not None:
875 # To avoid unfair competition, we won't use unrelated intermediate
875 # To avoid unfair competition, we won't use unrelated intermediate
876 # snapshot that are deeper than the ones from the parent delta
876 # snapshot that are deeper than the ones from the parent delta
877 # chain.
877 # chain.
878 max_depth = max(parents_snaps.keys())
878 max_depth = max(parents_snaps.keys())
879 chain = deltachain(other)
879 chain = deltachain(other)
880 for idx, s in enumerate(chain):
880 for idx, s in enumerate(chain):
881 if s < snapfloor:
881 if s < snapfloor:
882 continue
882 continue
883 if max_depth < idx:
883 if max_depth < idx:
884 break
884 break
885 if not revlog.issnapshot(s):
885 if not revlog.issnapshot(s):
886 break
886 break
887 parents_snaps[idx].add(s)
887 parents_snaps[idx].add(s)
888 # Test them as possible intermediate snapshot base
888 # Test them as possible intermediate snapshot base
889 # We test them from highest to lowest level. High level one are more
889 # We test them from highest to lowest level. High level one are more
890 # likely to result in small delta
890 # likely to result in small delta
891 floor = None
891 floor = None
892 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
892 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
893 siblings = set()
893 siblings = set()
894 for s in snaps:
894 for s in snaps:
895 siblings.update(snapshots[s])
895 siblings.update(snapshots[s])
896 # Before considering making a new intermediate snapshot, we check
896 # Before considering making a new intermediate snapshot, we check
897 # if an existing snapshot, children of base we consider, would be
897 # if an existing snapshot, children of base we consider, would be
898 # suitable.
898 # suitable.
899 #
899 #
900 # It give a change to reuse a delta chain "unrelated" to the
900 # It give a change to reuse a delta chain "unrelated" to the
901 # current revision instead of starting our own. Without such
901 # current revision instead of starting our own. Without such
902 # re-use, topological branches would keep reopening new chains.
902 # re-use, topological branches would keep reopening new chains.
903 # Creating more and more snapshot as the repository grow.
903 # Creating more and more snapshot as the repository grow.
904
904
905 if floor is not None:
905 if floor is not None:
906 # We only do this for siblings created after the one in our
906 # We only do this for siblings created after the one in our
907 # parent's delta chain. Those created before has less chances
907 # parent's delta chain. Those created before has less chances
908 # to be valid base since our ancestors had to create a new
908 # to be valid base since our ancestors had to create a new
909 # snapshot.
909 # snapshot.
910 siblings = [r for r in siblings if floor < r]
910 siblings = [r for r in siblings if floor < r]
911 yield tuple(sorted(siblings))
911 yield tuple(sorted(siblings))
912 # then test the base from our parent's delta chain.
912 # then test the base from our parent's delta chain.
913 yield tuple(sorted(snaps))
913 yield tuple(sorted(snaps))
914 floor = min(snaps)
914 floor = min(snaps)
915 # No suitable base found in the parent chain, search if any full
915 # No suitable base found in the parent chain, search if any full
916 # snapshots emitted since parent's base would be a suitable base for an
916 # snapshots emitted since parent's base would be a suitable base for an
917 # intermediate snapshot.
917 # intermediate snapshot.
918 #
918 #
919 # It give a chance to reuse a delta chain unrelated to the current
919 # It give a chance to reuse a delta chain unrelated to the current
920 # revisions instead of starting our own. Without such re-use,
920 # revisions instead of starting our own. Without such re-use,
921 # topological branches would keep reopening new full chains. Creating
921 # topological branches would keep reopening new full chains. Creating
922 # more and more snapshot as the repository grow.
922 # more and more snapshot as the repository grow.
923 yield tuple(snapshots[nullrev])
923 yield tuple(snapshots[nullrev])
924
924
925 if not sparse:
925 if not sparse:
926 # other approach failed try against prev to hopefully save us a
926 # other approach failed try against prev to hopefully save us a
927 # fulltext.
927 # fulltext.
928 yield (prev,)
928 yield (prev,)
929
929
930
930
931 class deltacomputer(object):
931 class deltacomputer(object):
932 def __init__(self, revlog):
932 def __init__(self, revlog):
933 self.revlog = revlog
933 self.revlog = revlog
934
934
935 def buildtext(self, revinfo, fh):
935 def buildtext(self, revinfo, fh):
936 """Builds a fulltext version of a revision
936 """Builds a fulltext version of a revision
937
937
938 revinfo: revisioninfo instance that contains all needed info
938 revinfo: revisioninfo instance that contains all needed info
939 fh: file handle to either the .i or the .d revlog file,
939 fh: file handle to either the .i or the .d revlog file,
940 depending on whether it is inlined or not
940 depending on whether it is inlined or not
941 """
941 """
942 btext = revinfo.btext
942 btext = revinfo.btext
943 if btext[0] is not None:
943 if btext[0] is not None:
944 return btext[0]
944 return btext[0]
945
945
946 revlog = self.revlog
946 revlog = self.revlog
947 cachedelta = revinfo.cachedelta
947 cachedelta = revinfo.cachedelta
948 baserev = cachedelta[0]
948 baserev = cachedelta[0]
949 delta = cachedelta[1]
949 delta = cachedelta[1]
950
950
951 fulltext = btext[0] = _textfromdelta(
951 fulltext = btext[0] = _textfromdelta(
952 fh,
952 fh,
953 revlog,
953 revlog,
954 baserev,
954 baserev,
955 delta,
955 delta,
956 revinfo.p1,
956 revinfo.p1,
957 revinfo.p2,
957 revinfo.p2,
958 revinfo.flags,
958 revinfo.flags,
959 revinfo.node,
959 revinfo.node,
960 )
960 )
961 return fulltext
961 return fulltext
962
962
963 def _builddeltadiff(self, base, revinfo, fh):
963 def _builddeltadiff(self, base, revinfo, fh):
964 revlog = self.revlog
964 revlog = self.revlog
965 t = self.buildtext(revinfo, fh)
965 t = self.buildtext(revinfo, fh)
966 if revlog.iscensored(base):
966 if revlog.iscensored(base):
967 # deltas based on a censored revision must replace the
967 # deltas based on a censored revision must replace the
968 # full content in one patch, so delta works everywhere
968 # full content in one patch, so delta works everywhere
969 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
969 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
970 delta = header + t
970 delta = header + t
971 else:
971 else:
972 ptext = revlog.rawdata(base, _df=fh)
972 ptext = revlog.rawdata(base, _df=fh)
973 delta = mdiff.textdiff(ptext, t)
973 delta = mdiff.textdiff(ptext, t)
974
974
975 return delta
975 return delta
976
976
977 def _builddeltainfo(self, revinfo, base, fh):
977 def _builddeltainfo(self, revinfo, base, fh):
978 # can we use the cached delta?
978 # can we use the cached delta?
979 revlog = self.revlog
979 revlog = self.revlog
980 chainbase = revlog.chainbase(base)
980 chainbase = revlog.chainbase(base)
981 if revlog._generaldelta:
981 if revlog._generaldelta:
982 deltabase = base
982 deltabase = base
983 else:
983 else:
984 deltabase = chainbase
984 deltabase = chainbase
985 snapshotdepth = None
985 snapshotdepth = None
986 if revlog._sparserevlog and deltabase == nullrev:
986 if revlog._sparserevlog and deltabase == nullrev:
987 snapshotdepth = 0
987 snapshotdepth = 0
988 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
988 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
989 # A delta chain should always be one full snapshot,
989 # A delta chain should always be one full snapshot,
990 # zero or more semi-snapshots, and zero or more deltas
990 # zero or more semi-snapshots, and zero or more deltas
991 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
991 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
992 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
992 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
993 snapshotdepth = len(revlog._deltachain(deltabase)[0])
993 snapshotdepth = len(revlog._deltachain(deltabase)[0])
994 delta = None
994 delta = None
995 if revinfo.cachedelta:
995 if revinfo.cachedelta:
996 cachebase, cachediff = revinfo.cachedelta
996 cachebase, cachediff = revinfo.cachedelta
997 # check if the diff still apply
997 # check if the diff still apply
998 currentbase = cachebase
998 currentbase = cachebase
999 while (
999 while (
1000 currentbase != nullrev
1000 currentbase != nullrev
1001 and currentbase != base
1001 and currentbase != base
1002 and self.revlog.length(currentbase) == 0
1002 and self.revlog.length(currentbase) == 0
1003 ):
1003 ):
1004 currentbase = self.revlog.deltaparent(currentbase)
1004 currentbase = self.revlog.deltaparent(currentbase)
1005 if self.revlog._lazydelta and currentbase == base:
1005 if self.revlog._lazydelta and currentbase == base:
1006 delta = revinfo.cachedelta[1]
1006 delta = revinfo.cachedelta[1]
1007 if delta is None:
1007 if delta is None:
1008 delta = self._builddeltadiff(base, revinfo, fh)
1008 delta = self._builddeltadiff(base, revinfo, fh)
1009 # snapshotdept need to be neither None nor 0 level snapshot
1009 # snapshotdept need to be neither None nor 0 level snapshot
1010 if revlog.upperboundcomp is not None and snapshotdepth:
1010 if revlog.upperboundcomp is not None and snapshotdepth:
1011 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1011 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1012 snapshotlimit = revinfo.textlen >> snapshotdepth
1012 snapshotlimit = revinfo.textlen >> snapshotdepth
1013 if snapshotlimit < lowestrealisticdeltalen:
1013 if snapshotlimit < lowestrealisticdeltalen:
1014 return None
1014 return None
1015 if revlog.length(base) < lowestrealisticdeltalen:
1015 if revlog.length(base) < lowestrealisticdeltalen:
1016 return None
1016 return None
1017 header, data = revlog.compress(delta)
1017 header, data = revlog.compress(delta)
1018 deltalen = len(header) + len(data)
1018 deltalen = len(header) + len(data)
1019 offset = revlog.end(len(revlog) - 1)
1019 offset = revlog.end(len(revlog) - 1)
1020 dist = deltalen + offset - revlog.start(chainbase)
1020 dist = deltalen + offset - revlog.start(chainbase)
1021 chainlen, compresseddeltalen = revlog._chaininfo(base)
1021 chainlen, compresseddeltalen = revlog._chaininfo(base)
1022 chainlen += 1
1022 chainlen += 1
1023 compresseddeltalen += deltalen
1023 compresseddeltalen += deltalen
1024
1024
1025 return _deltainfo(
1025 return _deltainfo(
1026 dist,
1026 dist,
1027 deltalen,
1027 deltalen,
1028 (header, data),
1028 (header, data),
1029 deltabase,
1029 deltabase,
1030 chainbase,
1030 chainbase,
1031 chainlen,
1031 chainlen,
1032 compresseddeltalen,
1032 compresseddeltalen,
1033 snapshotdepth,
1033 snapshotdepth,
1034 )
1034 )
1035
1035
1036 def _fullsnapshotinfo(self, fh, revinfo):
1036 def _fullsnapshotinfo(self, fh, revinfo, curr):
1037 curr = len(self.revlog)
1038 rawtext = self.buildtext(revinfo, fh)
1037 rawtext = self.buildtext(revinfo, fh)
1039 data = self.revlog.compress(rawtext)
1038 data = self.revlog.compress(rawtext)
1040 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1039 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1041 deltabase = chainbase = curr
1040 deltabase = chainbase = curr
1042 snapshotdepth = 0
1041 snapshotdepth = 0
1043 chainlen = 1
1042 chainlen = 1
1044
1043
1045 return _deltainfo(
1044 return _deltainfo(
1046 dist,
1045 dist,
1047 deltalen,
1046 deltalen,
1048 data,
1047 data,
1049 deltabase,
1048 deltabase,
1050 chainbase,
1049 chainbase,
1051 chainlen,
1050 chainlen,
1052 compresseddeltalen,
1051 compresseddeltalen,
1053 snapshotdepth,
1052 snapshotdepth,
1054 )
1053 )
1055
1054
1056 def finddeltainfo(self, revinfo, fh, excluded_bases=None):
1055 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1057 """Find an acceptable delta against a candidate revision
1056 """Find an acceptable delta against a candidate revision
1058
1057
1059 revinfo: information about the revision (instance of _revisioninfo)
1058 revinfo: information about the revision (instance of _revisioninfo)
1060 fh: file handle to either the .i or the .d revlog file,
1059 fh: file handle to either the .i or the .d revlog file,
1061 depending on whether it is inlined or not
1060 depending on whether it is inlined or not
1062
1061
1063 Returns the first acceptable candidate revision, as ordered by
1062 Returns the first acceptable candidate revision, as ordered by
1064 _candidategroups
1063 _candidategroups
1065
1064
1066 If no suitable deltabase is found, we return delta info for a full
1065 If no suitable deltabase is found, we return delta info for a full
1067 snapshot.
1066 snapshot.
1068
1067
1069 `excluded_bases` is an optional set of revision that cannot be used as
1068 `excluded_bases` is an optional set of revision that cannot be used as
1070 a delta base. Use this to recompute delta suitable in censor or strip
1069 a delta base. Use this to recompute delta suitable in censor or strip
1071 context.
1070 context.
1072 """
1071 """
1072 if target_rev is None:
1073 curr = len(self.revlog)
1074
1073 if not revinfo.textlen:
1075 if not revinfo.textlen:
1074 return self._fullsnapshotinfo(fh, revinfo)
1076 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1075
1077
1076 if excluded_bases is None:
1078 if excluded_bases is None:
1077 excluded_bases = set()
1079 excluded_bases = set()
1078
1080
1079 # no delta for flag processor revision (see "candelta" for why)
1081 # no delta for flag processor revision (see "candelta" for why)
1080 # not calling candelta since only one revision needs test, also to
1082 # not calling candelta since only one revision needs test, also to
1081 # avoid overhead fetching flags again.
1083 # avoid overhead fetching flags again.
1082 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1084 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1083 return self._fullsnapshotinfo(fh, revinfo)
1085 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1084
1086
1085 cachedelta = revinfo.cachedelta
1087 cachedelta = revinfo.cachedelta
1086 p1 = revinfo.p1
1088 p1 = revinfo.p1
1087 p2 = revinfo.p2
1089 p2 = revinfo.p2
1088 revlog = self.revlog
1090 revlog = self.revlog
1089
1091
1090 deltainfo = None
1092 deltainfo = None
1091 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1093 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1092 groups = _candidategroups(
1094 groups = _candidategroups(
1093 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1095 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1094 )
1096 )
1095 candidaterevs = next(groups)
1097 candidaterevs = next(groups)
1096 while candidaterevs is not None:
1098 while candidaterevs is not None:
1097 nominateddeltas = []
1099 nominateddeltas = []
1098 if deltainfo is not None:
1100 if deltainfo is not None:
1099 # if we already found a good delta,
1101 # if we already found a good delta,
1100 # challenge it against refined candidates
1102 # challenge it against refined candidates
1101 nominateddeltas.append(deltainfo)
1103 nominateddeltas.append(deltainfo)
1102 for candidaterev in candidaterevs:
1104 for candidaterev in candidaterevs:
1103 if candidaterev in excluded_bases:
1105 if candidaterev in excluded_bases:
1104 continue
1106 continue
1107 if candidaterev >= target_rev:
1108 continue
1105 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1109 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1106 if candidatedelta is not None:
1110 if candidatedelta is not None:
1107 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1111 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1108 nominateddeltas.append(candidatedelta)
1112 nominateddeltas.append(candidatedelta)
1109 if nominateddeltas:
1113 if nominateddeltas:
1110 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1114 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1111 if deltainfo is not None:
1115 if deltainfo is not None:
1112 candidaterevs = groups.send(deltainfo.base)
1116 candidaterevs = groups.send(deltainfo.base)
1113 else:
1117 else:
1114 candidaterevs = next(groups)
1118 candidaterevs = next(groups)
1115
1119
1116 if deltainfo is None:
1120 if deltainfo is None:
1117 deltainfo = self._fullsnapshotinfo(fh, revinfo)
1121 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1118 return deltainfo
1122 return deltainfo
1119
1123
1120
1124
1121 def delta_compression(default_compression_header, deltainfo):
1125 def delta_compression(default_compression_header, deltainfo):
1122 """return (COMPRESSION_MODE, deltainfo)
1126 """return (COMPRESSION_MODE, deltainfo)
1123
1127
1124 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1128 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1125 compression.
1129 compression.
1126 """
1130 """
1127 h, d = deltainfo.data
1131 h, d = deltainfo.data
1128 compression_mode = COMP_MODE_INLINE
1132 compression_mode = COMP_MODE_INLINE
1129 if not h and not d:
1133 if not h and not d:
1130 # not data to store at all... declare them uncompressed
1134 # not data to store at all... declare them uncompressed
1131 compression_mode = COMP_MODE_PLAIN
1135 compression_mode = COMP_MODE_PLAIN
1132 elif not h:
1136 elif not h:
1133 t = d[0:1]
1137 t = d[0:1]
1134 if t == b'\0':
1138 if t == b'\0':
1135 compression_mode = COMP_MODE_PLAIN
1139 compression_mode = COMP_MODE_PLAIN
1136 elif t == default_compression_header:
1140 elif t == default_compression_header:
1137 compression_mode = COMP_MODE_DEFAULT
1141 compression_mode = COMP_MODE_DEFAULT
1138 elif h == b'u':
1142 elif h == b'u':
1139 # we have a more efficient way to declare uncompressed
1143 # we have a more efficient way to declare uncompressed
1140 h = b''
1144 h = b''
1141 compression_mode = COMP_MODE_PLAIN
1145 compression_mode = COMP_MODE_PLAIN
1142 deltainfo = drop_u_compression(deltainfo)
1146 deltainfo = drop_u_compression(deltainfo)
1143 return compression_mode, deltainfo
1147 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now