##// END OF EJS Templates
deltas: add code to display information about the result of `finddeltainfo`...
marmoute -
r50121:e6b7c6fb default
parent child Browse files
Show More
@@ -1,1146 +1,1233 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 KIND_CHANGELOG,
24 KIND_FILELOG,
25 KIND_MANIFESTLOG,
23 REVIDX_ISCENSORED,
26 REVIDX_ISCENSORED,
24 REVIDX_RAWTEXT_CHANGING_FLAGS,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
25 )
28 )
26
29
27 from ..thirdparty import attr
30 from ..thirdparty import attr
28
31
29 from .. import (
32 from .. import (
30 error,
33 error,
31 mdiff,
34 mdiff,
32 util,
35 util,
33 )
36 )
34
37
35 from . import flagutil
38 from . import flagutil
36
39
37 # maximum <delta-chain-data>/<revision-text-length> ratio
40 # maximum <delta-chain-data>/<revision-text-length> ratio
38 LIMIT_DELTA2TEXT = 2
41 LIMIT_DELTA2TEXT = 2
39
42
40
43
41 class _testrevlog:
44 class _testrevlog:
42 """minimalist fake revlog to use in doctests"""
45 """minimalist fake revlog to use in doctests"""
43
46
44 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
45 """data is an list of revision payload boundaries"""
48 """data is an list of revision payload boundaries"""
46 self._data = data
49 self._data = data
47 self._srdensitythreshold = density
50 self._srdensitythreshold = density
48 self._srmingapsize = mingap
51 self._srmingapsize = mingap
49 self._snapshot = set(snapshot)
52 self._snapshot = set(snapshot)
50 self.index = None
53 self.index = None
51
54
52 def start(self, rev):
55 def start(self, rev):
53 if rev == nullrev:
56 if rev == nullrev:
54 return 0
57 return 0
55 if rev == 0:
58 if rev == 0:
56 return 0
59 return 0
57 return self._data[rev - 1]
60 return self._data[rev - 1]
58
61
59 def end(self, rev):
62 def end(self, rev):
60 if rev == nullrev:
63 if rev == nullrev:
61 return 0
64 return 0
62 return self._data[rev]
65 return self._data[rev]
63
66
64 def length(self, rev):
67 def length(self, rev):
65 return self.end(rev) - self.start(rev)
68 return self.end(rev) - self.start(rev)
66
69
67 def __len__(self):
70 def __len__(self):
68 return len(self._data)
71 return len(self._data)
69
72
70 def issnapshot(self, rev):
73 def issnapshot(self, rev):
71 if rev == nullrev:
74 if rev == nullrev:
72 return True
75 return True
73 return rev in self._snapshot
76 return rev in self._snapshot
74
77
75
78
76 def slicechunk(revlog, revs, targetsize=None):
79 def slicechunk(revlog, revs, targetsize=None):
77 """slice revs to reduce the amount of unrelated data to be read from disk.
80 """slice revs to reduce the amount of unrelated data to be read from disk.
78
81
79 ``revs`` is sliced into groups that should be read in one time.
82 ``revs`` is sliced into groups that should be read in one time.
80 Assume that revs are sorted.
83 Assume that revs are sorted.
81
84
82 The initial chunk is sliced until the overall density (payload/chunks-span
85 The initial chunk is sliced until the overall density (payload/chunks-span
83 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
84 `revlog._srmingapsize` is skipped.
87 `revlog._srmingapsize` is skipped.
85
88
86 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
87 For consistency with other slicing choice, this limit won't go lower than
90 For consistency with other slicing choice, this limit won't go lower than
88 `revlog._srmingapsize`.
91 `revlog._srmingapsize`.
89
92
90 If individual revisions chunk are larger than this limit, they will still
93 If individual revisions chunk are larger than this limit, they will still
91 be raised individually.
94 be raised individually.
92
95
93 >>> data = [
96 >>> data = [
94 ... 5, #00 (5)
97 ... 5, #00 (5)
95 ... 10, #01 (5)
98 ... 10, #01 (5)
96 ... 12, #02 (2)
99 ... 12, #02 (2)
97 ... 12, #03 (empty)
100 ... 12, #03 (empty)
98 ... 27, #04 (15)
101 ... 27, #04 (15)
99 ... 31, #05 (4)
102 ... 31, #05 (4)
100 ... 31, #06 (empty)
103 ... 31, #06 (empty)
101 ... 42, #07 (11)
104 ... 42, #07 (11)
102 ... 47, #08 (5)
105 ... 47, #08 (5)
103 ... 47, #09 (empty)
106 ... 47, #09 (empty)
104 ... 48, #10 (1)
107 ... 48, #10 (1)
105 ... 51, #11 (3)
108 ... 51, #11 (3)
106 ... 74, #12 (23)
109 ... 74, #12 (23)
107 ... 85, #13 (11)
110 ... 85, #13 (11)
108 ... 86, #14 (1)
111 ... 86, #14 (1)
109 ... 91, #15 (5)
112 ... 91, #15 (5)
110 ... ]
113 ... ]
111 >>> revlog = _testrevlog(data, snapshot=range(16))
114 >>> revlog = _testrevlog(data, snapshot=range(16))
112
115
113 >>> list(slicechunk(revlog, list(range(16))))
116 >>> list(slicechunk(revlog, list(range(16))))
114 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
115 >>> list(slicechunk(revlog, [0, 15]))
118 >>> list(slicechunk(revlog, [0, 15]))
116 [[0], [15]]
119 [[0], [15]]
117 >>> list(slicechunk(revlog, [0, 11, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 15]))
118 [[0], [11], [15]]
121 [[0], [11], [15]]
119 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
120 [[0], [11, 13, 15]]
123 [[0], [11, 13, 15]]
121 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
122 [[1, 2], [5, 8, 10, 11], [14]]
125 [[1, 2], [5, 8, 10, 11], [14]]
123
126
124 Slicing with a maximum chunk size
127 Slicing with a maximum chunk size
125 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
126 [[0], [11], [13], [15]]
129 [[0], [11], [13], [15]]
127 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
128 [[0], [11], [13, 15]]
131 [[0], [11], [13, 15]]
129
132
130 Slicing involving nullrev
133 Slicing involving nullrev
131 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
132 [[-1, 0], [11], [13, 15]]
135 [[-1, 0], [11], [13, 15]]
133 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
134 [[-1], [13], [15]]
137 [[-1], [13], [15]]
135 """
138 """
136 if targetsize is not None:
139 if targetsize is not None:
137 targetsize = max(targetsize, revlog._srmingapsize)
140 targetsize = max(targetsize, revlog._srmingapsize)
138 # targetsize should not be specified when evaluating delta candidates:
141 # targetsize should not be specified when evaluating delta candidates:
139 # * targetsize is used to ensure we stay within specification when reading,
142 # * targetsize is used to ensure we stay within specification when reading,
140 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
141 if densityslicing is None:
144 if densityslicing is None:
142 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
143 for chunk in densityslicing(
146 for chunk in densityslicing(
144 revs, revlog._srdensitythreshold, revlog._srmingapsize
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
145 ):
148 ):
146 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
147 yield subchunk
150 yield subchunk
148
151
149
152
150 def _slicechunktosize(revlog, revs, targetsize=None):
153 def _slicechunktosize(revlog, revs, targetsize=None):
151 """slice revs to match the target size
154 """slice revs to match the target size
152
155
153 This is intended to be used on chunk that density slicing selected by that
156 This is intended to be used on chunk that density slicing selected by that
154 are still too large compared to the read garantee of revlog. This might
157 are still too large compared to the read garantee of revlog. This might
155 happens when "minimal gap size" interrupted the slicing or when chain are
158 happens when "minimal gap size" interrupted the slicing or when chain are
156 built in a way that create large blocks next to each other.
159 built in a way that create large blocks next to each other.
157
160
158 >>> data = [
161 >>> data = [
159 ... 3, #0 (3)
162 ... 3, #0 (3)
160 ... 5, #1 (2)
163 ... 5, #1 (2)
161 ... 6, #2 (1)
164 ... 6, #2 (1)
162 ... 8, #3 (2)
165 ... 8, #3 (2)
163 ... 8, #4 (empty)
166 ... 8, #4 (empty)
164 ... 11, #5 (3)
167 ... 11, #5 (3)
165 ... 12, #6 (1)
168 ... 12, #6 (1)
166 ... 13, #7 (1)
169 ... 13, #7 (1)
167 ... 14, #8 (1)
170 ... 14, #8 (1)
168 ... ]
171 ... ]
169
172
170 == All snapshots cases ==
173 == All snapshots cases ==
171 >>> revlog = _testrevlog(data, snapshot=range(9))
174 >>> revlog = _testrevlog(data, snapshot=range(9))
172
175
173 Cases where chunk is already small enough
176 Cases where chunk is already small enough
174 >>> list(_slicechunktosize(revlog, [0], 3))
177 >>> list(_slicechunktosize(revlog, [0], 3))
175 [[0]]
178 [[0]]
176 >>> list(_slicechunktosize(revlog, [6, 7], 3))
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
177 [[6, 7]]
180 [[6, 7]]
178 >>> list(_slicechunktosize(revlog, [0], None))
181 >>> list(_slicechunktosize(revlog, [0], None))
179 [[0]]
182 [[0]]
180 >>> list(_slicechunktosize(revlog, [6, 7], None))
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
181 [[6, 7]]
184 [[6, 7]]
182
185
183 cases where we need actual slicing
186 cases where we need actual slicing
184 >>> list(_slicechunktosize(revlog, [0, 1], 3))
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
185 [[0], [1]]
188 [[0], [1]]
186 >>> list(_slicechunktosize(revlog, [1, 3], 3))
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
187 [[1], [3]]
190 [[1], [3]]
188 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
189 [[1, 2], [3]]
192 [[1, 2], [3]]
190 >>> list(_slicechunktosize(revlog, [3, 5], 3))
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
191 [[3], [5]]
194 [[3], [5]]
192 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
193 [[3], [5]]
196 [[3], [5]]
194 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
195 [[5], [6, 7, 8]]
198 [[5], [6, 7, 8]]
196 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
197 [[0], [1, 2], [3], [5], [6, 7, 8]]
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
198
201
199 Case with too large individual chunk (must return valid chunk)
202 Case with too large individual chunk (must return valid chunk)
200 >>> list(_slicechunktosize(revlog, [0, 1], 2))
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
201 [[0], [1]]
204 [[0], [1]]
202 >>> list(_slicechunktosize(revlog, [1, 3], 1))
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
203 [[1], [3]]
206 [[1], [3]]
204 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
205 [[3], [5]]
208 [[3], [5]]
206
209
207 == No Snapshot cases ==
210 == No Snapshot cases ==
208 >>> revlog = _testrevlog(data)
211 >>> revlog = _testrevlog(data)
209
212
210 Cases where chunk is already small enough
213 Cases where chunk is already small enough
211 >>> list(_slicechunktosize(revlog, [0], 3))
214 >>> list(_slicechunktosize(revlog, [0], 3))
212 [[0]]
215 [[0]]
213 >>> list(_slicechunktosize(revlog, [6, 7], 3))
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
214 [[6, 7]]
217 [[6, 7]]
215 >>> list(_slicechunktosize(revlog, [0], None))
218 >>> list(_slicechunktosize(revlog, [0], None))
216 [[0]]
219 [[0]]
217 >>> list(_slicechunktosize(revlog, [6, 7], None))
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
218 [[6, 7]]
221 [[6, 7]]
219
222
220 cases where we need actual slicing
223 cases where we need actual slicing
221 >>> list(_slicechunktosize(revlog, [0, 1], 3))
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
222 [[0], [1]]
225 [[0], [1]]
223 >>> list(_slicechunktosize(revlog, [1, 3], 3))
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
224 [[1], [3]]
227 [[1], [3]]
225 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
226 [[1], [2, 3]]
229 [[1], [2, 3]]
227 >>> list(_slicechunktosize(revlog, [3, 5], 3))
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
228 [[3], [5]]
231 [[3], [5]]
229 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
230 [[3], [4, 5]]
233 [[3], [4, 5]]
231 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
232 [[5], [6, 7, 8]]
235 [[5], [6, 7, 8]]
233 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
234 [[0], [1, 2], [3], [5], [6, 7, 8]]
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
235
238
236 Case with too large individual chunk (must return valid chunk)
239 Case with too large individual chunk (must return valid chunk)
237 >>> list(_slicechunktosize(revlog, [0, 1], 2))
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
238 [[0], [1]]
241 [[0], [1]]
239 >>> list(_slicechunktosize(revlog, [1, 3], 1))
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
240 [[1], [3]]
243 [[1], [3]]
241 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
242 [[3], [5]]
245 [[3], [5]]
243
246
244 == mixed case ==
247 == mixed case ==
245 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
246 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
247 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
248 """
251 """
249 assert targetsize is None or 0 <= targetsize
252 assert targetsize is None or 0 <= targetsize
250 startdata = revlog.start(revs[0])
253 startdata = revlog.start(revs[0])
251 enddata = revlog.end(revs[-1])
254 enddata = revlog.end(revs[-1])
252 fullspan = enddata - startdata
255 fullspan = enddata - startdata
253 if targetsize is None or fullspan <= targetsize:
256 if targetsize is None or fullspan <= targetsize:
254 yield revs
257 yield revs
255 return
258 return
256
259
257 startrevidx = 0
260 startrevidx = 0
258 endrevidx = 1
261 endrevidx = 1
259 iterrevs = enumerate(revs)
262 iterrevs = enumerate(revs)
260 next(iterrevs) # skip first rev.
263 next(iterrevs) # skip first rev.
261 # first step: get snapshots out of the way
264 # first step: get snapshots out of the way
262 for idx, r in iterrevs:
265 for idx, r in iterrevs:
263 span = revlog.end(r) - startdata
266 span = revlog.end(r) - startdata
264 snapshot = revlog.issnapshot(r)
267 snapshot = revlog.issnapshot(r)
265 if span <= targetsize and snapshot:
268 if span <= targetsize and snapshot:
266 endrevidx = idx + 1
269 endrevidx = idx + 1
267 else:
270 else:
268 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
269 if chunk:
272 if chunk:
270 yield chunk
273 yield chunk
271 startrevidx = idx
274 startrevidx = idx
272 startdata = revlog.start(r)
275 startdata = revlog.start(r)
273 endrevidx = idx + 1
276 endrevidx = idx + 1
274 if not snapshot:
277 if not snapshot:
275 break
278 break
276
279
277 # for the others, we use binary slicing to quickly converge toward valid
280 # for the others, we use binary slicing to quickly converge toward valid
278 # chunks (otherwise, we might end up looking for start/end of many
281 # chunks (otherwise, we might end up looking for start/end of many
279 # revisions). This logic is not looking for the perfect slicing point, it
282 # revisions). This logic is not looking for the perfect slicing point, it
280 # focuses on quickly converging toward valid chunks.
283 # focuses on quickly converging toward valid chunks.
281 nbitem = len(revs)
284 nbitem = len(revs)
282 while (enddata - startdata) > targetsize:
285 while (enddata - startdata) > targetsize:
283 endrevidx = nbitem
286 endrevidx = nbitem
284 if nbitem - startrevidx <= 1:
287 if nbitem - startrevidx <= 1:
285 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
286 localenddata = revlog.end(revs[endrevidx - 1])
289 localenddata = revlog.end(revs[endrevidx - 1])
287 span = localenddata - startdata
290 span = localenddata - startdata
288 while span > targetsize:
291 while span > targetsize:
289 if endrevidx - startrevidx <= 1:
292 if endrevidx - startrevidx <= 1:
290 break # protect against individual chunk larger than limit
293 break # protect against individual chunk larger than limit
291 endrevidx -= (endrevidx - startrevidx) // 2
294 endrevidx -= (endrevidx - startrevidx) // 2
292 localenddata = revlog.end(revs[endrevidx - 1])
295 localenddata = revlog.end(revs[endrevidx - 1])
293 span = localenddata - startdata
296 span = localenddata - startdata
294 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
295 if chunk:
298 if chunk:
296 yield chunk
299 yield chunk
297 startrevidx = endrevidx
300 startrevidx = endrevidx
298 startdata = revlog.start(revs[startrevidx])
301 startdata = revlog.start(revs[startrevidx])
299
302
300 chunk = _trimchunk(revlog, revs, startrevidx)
303 chunk = _trimchunk(revlog, revs, startrevidx)
301 if chunk:
304 if chunk:
302 yield chunk
305 yield chunk
303
306
304
307
305 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
306 """slice revs to reduce the amount of unrelated data to be read from disk.
309 """slice revs to reduce the amount of unrelated data to be read from disk.
307
310
308 ``revs`` is sliced into groups that should be read in one time.
311 ``revs`` is sliced into groups that should be read in one time.
309 Assume that revs are sorted.
312 Assume that revs are sorted.
310
313
311 The initial chunk is sliced until the overall density (payload/chunks-span
314 The initial chunk is sliced until the overall density (payload/chunks-span
312 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
313 skipped.
316 skipped.
314
317
315 >>> revlog = _testrevlog([
318 >>> revlog = _testrevlog([
316 ... 5, #00 (5)
319 ... 5, #00 (5)
317 ... 10, #01 (5)
320 ... 10, #01 (5)
318 ... 12, #02 (2)
321 ... 12, #02 (2)
319 ... 12, #03 (empty)
322 ... 12, #03 (empty)
320 ... 27, #04 (15)
323 ... 27, #04 (15)
321 ... 31, #05 (4)
324 ... 31, #05 (4)
322 ... 31, #06 (empty)
325 ... 31, #06 (empty)
323 ... 42, #07 (11)
326 ... 42, #07 (11)
324 ... 47, #08 (5)
327 ... 47, #08 (5)
325 ... 47, #09 (empty)
328 ... 47, #09 (empty)
326 ... 48, #10 (1)
329 ... 48, #10 (1)
327 ... 51, #11 (3)
330 ... 51, #11 (3)
328 ... 74, #12 (23)
331 ... 74, #12 (23)
329 ... 85, #13 (11)
332 ... 85, #13 (11)
330 ... 86, #14 (1)
333 ... 86, #14 (1)
331 ... 91, #15 (5)
334 ... 91, #15 (5)
332 ... ])
335 ... ])
333
336
334 >>> list(_slicechunktodensity(revlog, list(range(16))))
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
335 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
336 >>> list(_slicechunktodensity(revlog, [0, 15]))
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
337 [[0], [15]]
340 [[0], [15]]
338 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
339 [[0], [11], [15]]
342 [[0], [11], [15]]
340 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
341 [[0], [11, 13, 15]]
344 [[0], [11, 13, 15]]
342 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
343 [[1, 2], [5, 8, 10, 11], [14]]
346 [[1, 2], [5, 8, 10, 11], [14]]
344 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
345 ... mingapsize=20))
348 ... mingapsize=20))
346 [[1, 2, 3, 5, 8, 10, 11], [14]]
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 ... targetdensity=0.95))
351 ... targetdensity=0.95))
349 [[1, 2], [5], [8, 10, 11], [14]]
352 [[1, 2], [5], [8, 10, 11], [14]]
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 ... targetdensity=0.95, mingapsize=12))
354 ... targetdensity=0.95, mingapsize=12))
352 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
353 """
356 """
354 start = revlog.start
357 start = revlog.start
355 length = revlog.length
358 length = revlog.length
356
359
357 if len(revs) <= 1:
360 if len(revs) <= 1:
358 yield revs
361 yield revs
359 return
362 return
360
363
361 deltachainspan = segmentspan(revlog, revs)
364 deltachainspan = segmentspan(revlog, revs)
362
365
363 if deltachainspan < mingapsize:
366 if deltachainspan < mingapsize:
364 yield revs
367 yield revs
365 return
368 return
366
369
367 readdata = deltachainspan
370 readdata = deltachainspan
368 chainpayload = sum(length(r) for r in revs)
371 chainpayload = sum(length(r) for r in revs)
369
372
370 if deltachainspan:
373 if deltachainspan:
371 density = chainpayload / float(deltachainspan)
374 density = chainpayload / float(deltachainspan)
372 else:
375 else:
373 density = 1.0
376 density = 1.0
374
377
375 if density >= targetdensity:
378 if density >= targetdensity:
376 yield revs
379 yield revs
377 return
380 return
378
381
379 # Store the gaps in a heap to have them sorted by decreasing size
382 # Store the gaps in a heap to have them sorted by decreasing size
380 gaps = []
383 gaps = []
381 prevend = None
384 prevend = None
382 for i, rev in enumerate(revs):
385 for i, rev in enumerate(revs):
383 revstart = start(rev)
386 revstart = start(rev)
384 revlen = length(rev)
387 revlen = length(rev)
385
388
386 # Skip empty revisions to form larger holes
389 # Skip empty revisions to form larger holes
387 if revlen == 0:
390 if revlen == 0:
388 continue
391 continue
389
392
390 if prevend is not None:
393 if prevend is not None:
391 gapsize = revstart - prevend
394 gapsize = revstart - prevend
392 # only consider holes that are large enough
395 # only consider holes that are large enough
393 if gapsize > mingapsize:
396 if gapsize > mingapsize:
394 gaps.append((gapsize, i))
397 gaps.append((gapsize, i))
395
398
396 prevend = revstart + revlen
399 prevend = revstart + revlen
397 # sort the gaps to pop them from largest to small
400 # sort the gaps to pop them from largest to small
398 gaps.sort()
401 gaps.sort()
399
402
400 # Collect the indices of the largest holes until the density is acceptable
403 # Collect the indices of the largest holes until the density is acceptable
401 selected = []
404 selected = []
402 while gaps and density < targetdensity:
405 while gaps and density < targetdensity:
403 gapsize, gapidx = gaps.pop()
406 gapsize, gapidx = gaps.pop()
404
407
405 selected.append(gapidx)
408 selected.append(gapidx)
406
409
407 # the gap sizes are stored as negatives to be sorted decreasingly
410 # the gap sizes are stored as negatives to be sorted decreasingly
408 # by the heap
411 # by the heap
409 readdata -= gapsize
412 readdata -= gapsize
410 if readdata > 0:
413 if readdata > 0:
411 density = chainpayload / float(readdata)
414 density = chainpayload / float(readdata)
412 else:
415 else:
413 density = 1.0
416 density = 1.0
414 selected.sort()
417 selected.sort()
415
418
416 # Cut the revs at collected indices
419 # Cut the revs at collected indices
417 previdx = 0
420 previdx = 0
418 for idx in selected:
421 for idx in selected:
419
422
420 chunk = _trimchunk(revlog, revs, previdx, idx)
423 chunk = _trimchunk(revlog, revs, previdx, idx)
421 if chunk:
424 if chunk:
422 yield chunk
425 yield chunk
423
426
424 previdx = idx
427 previdx = idx
425
428
426 chunk = _trimchunk(revlog, revs, previdx)
429 chunk = _trimchunk(revlog, revs, previdx)
427 if chunk:
430 if chunk:
428 yield chunk
431 yield chunk
429
432
430
433
431 def _trimchunk(revlog, revs, startidx, endidx=None):
434 def _trimchunk(revlog, revs, startidx, endidx=None):
432 """returns revs[startidx:endidx] without empty trailing revs
435 """returns revs[startidx:endidx] without empty trailing revs
433
436
434 Doctest Setup
437 Doctest Setup
435 >>> revlog = _testrevlog([
438 >>> revlog = _testrevlog([
436 ... 5, #0
439 ... 5, #0
437 ... 10, #1
440 ... 10, #1
438 ... 12, #2
441 ... 12, #2
439 ... 12, #3 (empty)
442 ... 12, #3 (empty)
440 ... 17, #4
443 ... 17, #4
441 ... 21, #5
444 ... 21, #5
442 ... 21, #6 (empty)
445 ... 21, #6 (empty)
443 ... ])
446 ... ])
444
447
445 Contiguous cases:
448 Contiguous cases:
446 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
447 [0, 1, 2, 3, 4, 5]
450 [0, 1, 2, 3, 4, 5]
448 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
449 [0, 1, 2, 3, 4]
452 [0, 1, 2, 3, 4]
450 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
451 [0, 1, 2]
454 [0, 1, 2]
452 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
453 [2]
456 [2]
454 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
455 [3, 4, 5]
458 [3, 4, 5]
456 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
457 [3, 4]
460 [3, 4]
458
461
459 Discontiguous cases:
462 Discontiguous cases:
460 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
461 [1, 3, 5]
464 [1, 3, 5]
462 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
463 [1]
466 [1]
464 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
465 [3, 5]
468 [3, 5]
466 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
467 [3, 5]
470 [3, 5]
468 """
471 """
469 length = revlog.length
472 length = revlog.length
470
473
471 if endidx is None:
474 if endidx is None:
472 endidx = len(revs)
475 endidx = len(revs)
473
476
474 # If we have a non-emtpy delta candidate, there are nothing to trim
477 # If we have a non-emtpy delta candidate, there are nothing to trim
475 if revs[endidx - 1] < len(revlog):
478 if revs[endidx - 1] < len(revlog):
476 # Trim empty revs at the end, except the very first revision of a chain
479 # Trim empty revs at the end, except the very first revision of a chain
477 while (
480 while (
478 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
479 ):
482 ):
480 endidx -= 1
483 endidx -= 1
481
484
482 return revs[startidx:endidx]
485 return revs[startidx:endidx]
483
486
484
487
485 def segmentspan(revlog, revs):
488 def segmentspan(revlog, revs):
486 """Get the byte span of a segment of revisions
489 """Get the byte span of a segment of revisions
487
490
488 revs is a sorted array of revision numbers
491 revs is a sorted array of revision numbers
489
492
490 >>> revlog = _testrevlog([
493 >>> revlog = _testrevlog([
491 ... 5, #0
494 ... 5, #0
492 ... 10, #1
495 ... 10, #1
493 ... 12, #2
496 ... 12, #2
494 ... 12, #3 (empty)
497 ... 12, #3 (empty)
495 ... 17, #4
498 ... 17, #4
496 ... ])
499 ... ])
497
500
498 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
499 17
502 17
500 >>> segmentspan(revlog, [0, 4])
503 >>> segmentspan(revlog, [0, 4])
501 17
504 17
502 >>> segmentspan(revlog, [3, 4])
505 >>> segmentspan(revlog, [3, 4])
503 5
506 5
504 >>> segmentspan(revlog, [1, 2, 3,])
507 >>> segmentspan(revlog, [1, 2, 3,])
505 7
508 7
506 >>> segmentspan(revlog, [1, 3])
509 >>> segmentspan(revlog, [1, 3])
507 7
510 7
508 """
511 """
509 if not revs:
512 if not revs:
510 return 0
513 return 0
511 end = revlog.end(revs[-1])
514 end = revlog.end(revs[-1])
512 return end - revlog.start(revs[0])
515 return end - revlog.start(revs[0])
513
516
514
517
515 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
516 """build full text from a (base, delta) pair and other metadata"""
519 """build full text from a (base, delta) pair and other metadata"""
517 # special case deltas which replace entire base; no need to decode
520 # special case deltas which replace entire base; no need to decode
518 # base revision. this neatly avoids censored bases, which throw when
521 # base revision. this neatly avoids censored bases, which throw when
519 # they're decoded.
522 # they're decoded.
520 hlen = struct.calcsize(b">lll")
523 hlen = struct.calcsize(b">lll")
521 if delta[:hlen] == mdiff.replacediffheader(
524 if delta[:hlen] == mdiff.replacediffheader(
522 revlog.rawsize(baserev), len(delta) - hlen
525 revlog.rawsize(baserev), len(delta) - hlen
523 ):
526 ):
524 fulltext = delta[hlen:]
527 fulltext = delta[hlen:]
525 else:
528 else:
526 # deltabase is rawtext before changed by flag processors, which is
529 # deltabase is rawtext before changed by flag processors, which is
527 # equivalent to non-raw text
530 # equivalent to non-raw text
528 basetext = revlog.revision(baserev, _df=fh)
531 basetext = revlog.revision(baserev, _df=fh)
529 fulltext = mdiff.patch(basetext, delta)
532 fulltext = mdiff.patch(basetext, delta)
530
533
531 try:
534 try:
532 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
533 if validatehash:
536 if validatehash:
534 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
535 if flags & REVIDX_ISCENSORED:
538 if flags & REVIDX_ISCENSORED:
536 raise error.StorageError(
539 raise error.StorageError(
537 _(b'node %s is not censored') % expectednode
540 _(b'node %s is not censored') % expectednode
538 )
541 )
539 except error.CensoredNodeError:
542 except error.CensoredNodeError:
540 # must pass the censored index flag to add censored revisions
543 # must pass the censored index flag to add censored revisions
541 if not flags & REVIDX_ISCENSORED:
544 if not flags & REVIDX_ISCENSORED:
542 raise
545 raise
543 return fulltext
546 return fulltext
544
547
545
548
546 @attr.s(slots=True, frozen=True)
549 @attr.s(slots=True, frozen=True)
547 class _deltainfo:
550 class _deltainfo:
548 distance = attr.ib()
551 distance = attr.ib()
549 deltalen = attr.ib()
552 deltalen = attr.ib()
550 data = attr.ib()
553 data = attr.ib()
551 base = attr.ib()
554 base = attr.ib()
552 chainbase = attr.ib()
555 chainbase = attr.ib()
553 chainlen = attr.ib()
556 chainlen = attr.ib()
554 compresseddeltalen = attr.ib()
557 compresseddeltalen = attr.ib()
555 snapshotdepth = attr.ib()
558 snapshotdepth = attr.ib()
556
559
557
560
558 def drop_u_compression(delta):
561 def drop_u_compression(delta):
559 """turn into a "u" (no-compression) into no-compression without header
562 """turn into a "u" (no-compression) into no-compression without header
560
563
561 This is useful for revlog format that has better compression method.
564 This is useful for revlog format that has better compression method.
562 """
565 """
563 assert delta.data[0] == b'u', delta.data[0]
566 assert delta.data[0] == b'u', delta.data[0]
564 return _deltainfo(
567 return _deltainfo(
565 delta.distance,
568 delta.distance,
566 delta.deltalen - 1,
569 delta.deltalen - 1,
567 (b'', delta.data[1]),
570 (b'', delta.data[1]),
568 delta.base,
571 delta.base,
569 delta.chainbase,
572 delta.chainbase,
570 delta.chainlen,
573 delta.chainlen,
571 delta.compresseddeltalen,
574 delta.compresseddeltalen,
572 delta.snapshotdepth,
575 delta.snapshotdepth,
573 )
576 )
574
577
575
578
576 def isgooddeltainfo(revlog, deltainfo, revinfo):
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
577 """Returns True if the given delta is good. Good means that it is within
580 """Returns True if the given delta is good. Good means that it is within
578 the disk span, disk size, and chain length bounds that we know to be
581 the disk span, disk size, and chain length bounds that we know to be
579 performant."""
582 performant."""
580 if deltainfo is None:
583 if deltainfo is None:
581 return False
584 return False
582
585
583 # - 'deltainfo.distance' is the distance from the base revision --
586 # - 'deltainfo.distance' is the distance from the base revision --
584 # bounding it limits the amount of I/O we need to do.
587 # bounding it limits the amount of I/O we need to do.
585 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
586 # deltas we need to apply -- bounding it limits the amount of CPU
589 # deltas we need to apply -- bounding it limits the amount of CPU
587 # we consume.
590 # we consume.
588
591
589 textlen = revinfo.textlen
592 textlen = revinfo.textlen
590 defaultmax = textlen * 4
593 defaultmax = textlen * 4
591 maxdist = revlog._maxdeltachainspan
594 maxdist = revlog._maxdeltachainspan
592 if not maxdist:
595 if not maxdist:
593 maxdist = deltainfo.distance # ensure the conditional pass
596 maxdist = deltainfo.distance # ensure the conditional pass
594 maxdist = max(maxdist, defaultmax)
597 maxdist = max(maxdist, defaultmax)
595
598
596 # Bad delta from read span:
599 # Bad delta from read span:
597 #
600 #
598 # If the span of data read is larger than the maximum allowed.
601 # If the span of data read is larger than the maximum allowed.
599 #
602 #
600 # In the sparse-revlog case, we rely on the associated "sparse reading"
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
601 # to avoid issue related to the span of data. In theory, it would be
604 # to avoid issue related to the span of data. In theory, it would be
602 # possible to build pathological revlog where delta pattern would lead
605 # possible to build pathological revlog where delta pattern would lead
603 # to too many reads. However, they do not happen in practice at all. So
606 # to too many reads. However, they do not happen in practice at all. So
604 # we skip the span check entirely.
607 # we skip the span check entirely.
605 if not revlog._sparserevlog and maxdist < deltainfo.distance:
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
606 return False
609 return False
607
610
608 # Bad delta from new delta size:
611 # Bad delta from new delta size:
609 #
612 #
610 # If the delta size is larger than the target text, storing the
613 # If the delta size is larger than the target text, storing the
611 # delta will be inefficient.
614 # delta will be inefficient.
612 if textlen < deltainfo.deltalen:
615 if textlen < deltainfo.deltalen:
613 return False
616 return False
614
617
615 # Bad delta from cumulated payload size:
618 # Bad delta from cumulated payload size:
616 #
619 #
617 # If the sum of delta get larger than K * target text length.
620 # If the sum of delta get larger than K * target text length.
618 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
619 return False
622 return False
620
623
621 # Bad delta from chain length:
624 # Bad delta from chain length:
622 #
625 #
623 # If the number of delta in the chain gets too high.
626 # If the number of delta in the chain gets too high.
624 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
625 return False
628 return False
626
629
627 # bad delta from intermediate snapshot size limit
630 # bad delta from intermediate snapshot size limit
628 #
631 #
629 # If an intermediate snapshot size is higher than the limit. The
632 # If an intermediate snapshot size is higher than the limit. The
630 # limit exist to prevent endless chain of intermediate delta to be
633 # limit exist to prevent endless chain of intermediate delta to be
631 # created.
634 # created.
632 if (
635 if (
633 deltainfo.snapshotdepth is not None
636 deltainfo.snapshotdepth is not None
634 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
635 ):
638 ):
636 return False
639 return False
637
640
638 # bad delta if new intermediate snapshot is larger than the previous
641 # bad delta if new intermediate snapshot is larger than the previous
639 # snapshot
642 # snapshot
640 if (
643 if (
641 deltainfo.snapshotdepth
644 deltainfo.snapshotdepth
642 and revlog.length(deltainfo.base) < deltainfo.deltalen
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
643 ):
646 ):
644 return False
647 return False
645
648
646 return True
649 return True
647
650
648
651
649 # If a revision's full text is that much bigger than a base candidate full
652 # If a revision's full text is that much bigger than a base candidate full
650 # text's, it is very unlikely that it will produce a valid delta. We no longer
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
651 # consider these candidates.
654 # consider these candidates.
652 LIMIT_BASE2TEXT = 500
655 LIMIT_BASE2TEXT = 500
653
656
654
657
655 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
658 def _candidategroups(revlog, textlen, p1, p2, cachedelta):
656 """Provides group of revision to be tested as delta base
659 """Provides group of revision to be tested as delta base
657
660
658 This top level function focus on emitting groups with unique and worthwhile
661 This top level function focus on emitting groups with unique and worthwhile
659 content. See _raw_candidate_groups for details about the group order.
662 content. See _raw_candidate_groups for details about the group order.
660 """
663 """
661 # should we try to build a delta?
664 # should we try to build a delta?
662 if not (len(revlog) and revlog._storedeltachains):
665 if not (len(revlog) and revlog._storedeltachains):
663 yield None
666 yield None
664 return
667 return
665
668
666 deltalength = revlog.length
669 deltalength = revlog.length
667 deltaparent = revlog.deltaparent
670 deltaparent = revlog.deltaparent
668 sparse = revlog._sparserevlog
671 sparse = revlog._sparserevlog
669 good = None
672 good = None
670
673
671 deltas_limit = textlen * LIMIT_DELTA2TEXT
674 deltas_limit = textlen * LIMIT_DELTA2TEXT
672
675
673 tested = {nullrev}
676 tested = {nullrev}
674 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
677 candidates = _refinedgroups(revlog, p1, p2, cachedelta)
675 while True:
678 while True:
676 temptative = candidates.send(good)
679 temptative = candidates.send(good)
677 if temptative is None:
680 if temptative is None:
678 break
681 break
679 group = []
682 group = []
680 for rev in temptative:
683 for rev in temptative:
681 # skip over empty delta (no need to include them in a chain)
684 # skip over empty delta (no need to include them in a chain)
682 while revlog._generaldelta and not (
685 while revlog._generaldelta and not (
683 rev == nullrev or rev in tested or deltalength(rev)
686 rev == nullrev or rev in tested or deltalength(rev)
684 ):
687 ):
685 tested.add(rev)
688 tested.add(rev)
686 rev = deltaparent(rev)
689 rev = deltaparent(rev)
687 # no need to try a delta against nullrev, this will be done as a
690 # no need to try a delta against nullrev, this will be done as a
688 # last resort.
691 # last resort.
689 if rev == nullrev:
692 if rev == nullrev:
690 continue
693 continue
691 # filter out revision we tested already
694 # filter out revision we tested already
692 if rev in tested:
695 if rev in tested:
693 continue
696 continue
694 tested.add(rev)
697 tested.add(rev)
695 # filter out delta base that will never produce good delta
698 # filter out delta base that will never produce good delta
696 if deltas_limit < revlog.length(rev):
699 if deltas_limit < revlog.length(rev):
697 continue
700 continue
698 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
701 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
699 continue
702 continue
700 # no delta for rawtext-changing revs (see "candelta" for why)
703 # no delta for rawtext-changing revs (see "candelta" for why)
701 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
704 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
702 continue
705 continue
703 # If we reach here, we are about to build and test a delta.
706 # If we reach here, we are about to build and test a delta.
704 # The delta building process will compute the chaininfo in all
707 # The delta building process will compute the chaininfo in all
705 # case, since that computation is cached, it is fine to access it
708 # case, since that computation is cached, it is fine to access it
706 # here too.
709 # here too.
707 chainlen, chainsize = revlog._chaininfo(rev)
710 chainlen, chainsize = revlog._chaininfo(rev)
708 # if chain will be too long, skip base
711 # if chain will be too long, skip base
709 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
712 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
710 continue
713 continue
711 # if chain already have too much data, skip base
714 # if chain already have too much data, skip base
712 if deltas_limit < chainsize:
715 if deltas_limit < chainsize:
713 continue
716 continue
714 if sparse and revlog.upperboundcomp is not None:
717 if sparse and revlog.upperboundcomp is not None:
715 maxcomp = revlog.upperboundcomp
718 maxcomp = revlog.upperboundcomp
716 basenotsnap = (p1, p2, nullrev)
719 basenotsnap = (p1, p2, nullrev)
717 if rev not in basenotsnap and revlog.issnapshot(rev):
720 if rev not in basenotsnap and revlog.issnapshot(rev):
718 snapshotdepth = revlog.snapshotdepth(rev)
721 snapshotdepth = revlog.snapshotdepth(rev)
719 # If text is significantly larger than the base, we can
722 # If text is significantly larger than the base, we can
720 # expect the resulting delta to be proportional to the size
723 # expect the resulting delta to be proportional to the size
721 # difference
724 # difference
722 revsize = revlog.rawsize(rev)
725 revsize = revlog.rawsize(rev)
723 rawsizedistance = max(textlen - revsize, 0)
726 rawsizedistance = max(textlen - revsize, 0)
724 # use an estimate of the compression upper bound.
727 # use an estimate of the compression upper bound.
725 lowestrealisticdeltalen = rawsizedistance // maxcomp
728 lowestrealisticdeltalen = rawsizedistance // maxcomp
726
729
727 # check the absolute constraint on the delta size
730 # check the absolute constraint on the delta size
728 snapshotlimit = textlen >> snapshotdepth
731 snapshotlimit = textlen >> snapshotdepth
729 if snapshotlimit < lowestrealisticdeltalen:
732 if snapshotlimit < lowestrealisticdeltalen:
730 # delta lower bound is larger than accepted upper bound
733 # delta lower bound is larger than accepted upper bound
731 continue
734 continue
732
735
733 # check the relative constraint on the delta size
736 # check the relative constraint on the delta size
734 revlength = revlog.length(rev)
737 revlength = revlog.length(rev)
735 if revlength < lowestrealisticdeltalen:
738 if revlength < lowestrealisticdeltalen:
736 # delta probable lower bound is larger than target base
739 # delta probable lower bound is larger than target base
737 continue
740 continue
738
741
739 group.append(rev)
742 group.append(rev)
740 if group:
743 if group:
741 # XXX: in the sparse revlog case, group can become large,
744 # XXX: in the sparse revlog case, group can become large,
742 # impacting performances. Some bounding or slicing mecanism
745 # impacting performances. Some bounding or slicing mecanism
743 # would help to reduce this impact.
746 # would help to reduce this impact.
744 good = yield tuple(group)
747 good = yield tuple(group)
745 yield None
748 yield None
746
749
747
750
748 def _findsnapshots(revlog, cache, start_rev):
751 def _findsnapshots(revlog, cache, start_rev):
749 """find snapshot from start_rev to tip"""
752 """find snapshot from start_rev to tip"""
750 if util.safehasattr(revlog.index, b'findsnapshots'):
753 if util.safehasattr(revlog.index, b'findsnapshots'):
751 revlog.index.findsnapshots(cache, start_rev)
754 revlog.index.findsnapshots(cache, start_rev)
752 else:
755 else:
753 deltaparent = revlog.deltaparent
756 deltaparent = revlog.deltaparent
754 issnapshot = revlog.issnapshot
757 issnapshot = revlog.issnapshot
755 for rev in revlog.revs(start_rev):
758 for rev in revlog.revs(start_rev):
756 if issnapshot(rev):
759 if issnapshot(rev):
757 cache[deltaparent(rev)].append(rev)
760 cache[deltaparent(rev)].append(rev)
758
761
759
762
760 def _refinedgroups(revlog, p1, p2, cachedelta):
763 def _refinedgroups(revlog, p1, p2, cachedelta):
761 good = None
764 good = None
762 # First we try to reuse a the delta contained in the bundle.
765 # First we try to reuse a the delta contained in the bundle.
763 # (or from the source revlog)
766 # (or from the source revlog)
764 #
767 #
765 # This logic only applies to general delta repositories and can be disabled
768 # This logic only applies to general delta repositories and can be disabled
766 # through configuration. Disabling reuse source delta is useful when
769 # through configuration. Disabling reuse source delta is useful when
767 # we want to make sure we recomputed "optimal" deltas.
770 # we want to make sure we recomputed "optimal" deltas.
768 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
771 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
769 # Assume what we received from the server is a good choice
772 # Assume what we received from the server is a good choice
770 # build delta will reuse the cache
773 # build delta will reuse the cache
771 good = yield (cachedelta[0],)
774 good = yield (cachedelta[0],)
772 if good is not None:
775 if good is not None:
773 yield None
776 yield None
774 return
777 return
775 snapshots = collections.defaultdict(list)
778 snapshots = collections.defaultdict(list)
776 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
779 for candidates in _rawgroups(revlog, p1, p2, cachedelta, snapshots):
777 good = yield candidates
780 good = yield candidates
778 if good is not None:
781 if good is not None:
779 break
782 break
780
783
781 # If sparse revlog is enabled, we can try to refine the available deltas
784 # If sparse revlog is enabled, we can try to refine the available deltas
782 if not revlog._sparserevlog:
785 if not revlog._sparserevlog:
783 yield None
786 yield None
784 return
787 return
785
788
786 # if we have a refinable value, try to refine it
789 # if we have a refinable value, try to refine it
787 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
790 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
788 # refine snapshot down
791 # refine snapshot down
789 previous = None
792 previous = None
790 while previous != good:
793 while previous != good:
791 previous = good
794 previous = good
792 base = revlog.deltaparent(good)
795 base = revlog.deltaparent(good)
793 if base == nullrev:
796 if base == nullrev:
794 break
797 break
795 good = yield (base,)
798 good = yield (base,)
796 # refine snapshot up
799 # refine snapshot up
797 if not snapshots:
800 if not snapshots:
798 _findsnapshots(revlog, snapshots, good + 1)
801 _findsnapshots(revlog, snapshots, good + 1)
799 previous = None
802 previous = None
800 while good != previous:
803 while good != previous:
801 previous = good
804 previous = good
802 children = tuple(sorted(c for c in snapshots[good]))
805 children = tuple(sorted(c for c in snapshots[good]))
803 good = yield children
806 good = yield children
804
807
805 # we have found nothing
808 # we have found nothing
806 yield None
809 yield None
807
810
808
811
809 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
812 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
810 """Provides group of revision to be tested as delta base
813 """Provides group of revision to be tested as delta base
811
814
812 This lower level function focus on emitting delta theorically interresting
815 This lower level function focus on emitting delta theorically interresting
813 without looking it any practical details.
816 without looking it any practical details.
814
817
815 The group order aims at providing fast or small candidates first.
818 The group order aims at providing fast or small candidates first.
816 """
819 """
817 gdelta = revlog._generaldelta
820 gdelta = revlog._generaldelta
818 # gate sparse behind general-delta because of issue6056
821 # gate sparse behind general-delta because of issue6056
819 sparse = gdelta and revlog._sparserevlog
822 sparse = gdelta and revlog._sparserevlog
820 curr = len(revlog)
823 curr = len(revlog)
821 prev = curr - 1
824 prev = curr - 1
822 deltachain = lambda rev: revlog._deltachain(rev)[0]
825 deltachain = lambda rev: revlog._deltachain(rev)[0]
823
826
824 if gdelta:
827 if gdelta:
825 # exclude already lazy tested base if any
828 # exclude already lazy tested base if any
826 parents = [p for p in (p1, p2) if p != nullrev]
829 parents = [p for p in (p1, p2) if p != nullrev]
827
830
828 if not revlog._deltabothparents and len(parents) == 2:
831 if not revlog._deltabothparents and len(parents) == 2:
829 parents.sort()
832 parents.sort()
830 # To minimize the chance of having to build a fulltext,
833 # To minimize the chance of having to build a fulltext,
831 # pick first whichever parent is closest to us (max rev)
834 # pick first whichever parent is closest to us (max rev)
832 yield (parents[1],)
835 yield (parents[1],)
833 # then the other one (min rev) if the first did not fit
836 # then the other one (min rev) if the first did not fit
834 yield (parents[0],)
837 yield (parents[0],)
835 elif len(parents) > 0:
838 elif len(parents) > 0:
836 # Test all parents (1 or 2), and keep the best candidate
839 # Test all parents (1 or 2), and keep the best candidate
837 yield parents
840 yield parents
838
841
839 if sparse and parents:
842 if sparse and parents:
840 if snapshots is None:
843 if snapshots is None:
841 # map: base-rev: snapshot-rev
844 # map: base-rev: snapshot-rev
842 snapshots = collections.defaultdict(list)
845 snapshots = collections.defaultdict(list)
843 # See if we can use an existing snapshot in the parent chains to use as
846 # See if we can use an existing snapshot in the parent chains to use as
844 # a base for a new intermediate-snapshot
847 # a base for a new intermediate-snapshot
845 #
848 #
846 # search for snapshot in parents delta chain
849 # search for snapshot in parents delta chain
847 # map: snapshot-level: snapshot-rev
850 # map: snapshot-level: snapshot-rev
848 parents_snaps = collections.defaultdict(set)
851 parents_snaps = collections.defaultdict(set)
849 candidate_chains = [deltachain(p) for p in parents]
852 candidate_chains = [deltachain(p) for p in parents]
850 for chain in candidate_chains:
853 for chain in candidate_chains:
851 for idx, s in enumerate(chain):
854 for idx, s in enumerate(chain):
852 if not revlog.issnapshot(s):
855 if not revlog.issnapshot(s):
853 break
856 break
854 parents_snaps[idx].add(s)
857 parents_snaps[idx].add(s)
855 snapfloor = min(parents_snaps[0]) + 1
858 snapfloor = min(parents_snaps[0]) + 1
856 _findsnapshots(revlog, snapshots, snapfloor)
859 _findsnapshots(revlog, snapshots, snapfloor)
857 # search for the highest "unrelated" revision
860 # search for the highest "unrelated" revision
858 #
861 #
859 # Adding snapshots used by "unrelated" revision increase the odd we
862 # Adding snapshots used by "unrelated" revision increase the odd we
860 # reuse an independant, yet better snapshot chain.
863 # reuse an independant, yet better snapshot chain.
861 #
864 #
862 # XXX instead of building a set of revisions, we could lazily enumerate
865 # XXX instead of building a set of revisions, we could lazily enumerate
863 # over the chains. That would be more efficient, however we stick to
866 # over the chains. That would be more efficient, however we stick to
864 # simple code for now.
867 # simple code for now.
865 all_revs = set()
868 all_revs = set()
866 for chain in candidate_chains:
869 for chain in candidate_chains:
867 all_revs.update(chain)
870 all_revs.update(chain)
868 other = None
871 other = None
869 for r in revlog.revs(prev, snapfloor):
872 for r in revlog.revs(prev, snapfloor):
870 if r not in all_revs:
873 if r not in all_revs:
871 other = r
874 other = r
872 break
875 break
873 if other is not None:
876 if other is not None:
874 # To avoid unfair competition, we won't use unrelated intermediate
877 # To avoid unfair competition, we won't use unrelated intermediate
875 # snapshot that are deeper than the ones from the parent delta
878 # snapshot that are deeper than the ones from the parent delta
876 # chain.
879 # chain.
877 max_depth = max(parents_snaps.keys())
880 max_depth = max(parents_snaps.keys())
878 chain = deltachain(other)
881 chain = deltachain(other)
879 for idx, s in enumerate(chain):
882 for idx, s in enumerate(chain):
880 if s < snapfloor:
883 if s < snapfloor:
881 continue
884 continue
882 if max_depth < idx:
885 if max_depth < idx:
883 break
886 break
884 if not revlog.issnapshot(s):
887 if not revlog.issnapshot(s):
885 break
888 break
886 parents_snaps[idx].add(s)
889 parents_snaps[idx].add(s)
887 # Test them as possible intermediate snapshot base
890 # Test them as possible intermediate snapshot base
888 # We test them from highest to lowest level. High level one are more
891 # We test them from highest to lowest level. High level one are more
889 # likely to result in small delta
892 # likely to result in small delta
890 floor = None
893 floor = None
891 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
894 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
892 siblings = set()
895 siblings = set()
893 for s in snaps:
896 for s in snaps:
894 siblings.update(snapshots[s])
897 siblings.update(snapshots[s])
895 # Before considering making a new intermediate snapshot, we check
898 # Before considering making a new intermediate snapshot, we check
896 # if an existing snapshot, children of base we consider, would be
899 # if an existing snapshot, children of base we consider, would be
897 # suitable.
900 # suitable.
898 #
901 #
899 # It give a change to reuse a delta chain "unrelated" to the
902 # It give a change to reuse a delta chain "unrelated" to the
900 # current revision instead of starting our own. Without such
903 # current revision instead of starting our own. Without such
901 # re-use, topological branches would keep reopening new chains.
904 # re-use, topological branches would keep reopening new chains.
902 # Creating more and more snapshot as the repository grow.
905 # Creating more and more snapshot as the repository grow.
903
906
904 if floor is not None:
907 if floor is not None:
905 # We only do this for siblings created after the one in our
908 # We only do this for siblings created after the one in our
906 # parent's delta chain. Those created before has less chances
909 # parent's delta chain. Those created before has less chances
907 # to be valid base since our ancestors had to create a new
910 # to be valid base since our ancestors had to create a new
908 # snapshot.
911 # snapshot.
909 siblings = [r for r in siblings if floor < r]
912 siblings = [r for r in siblings if floor < r]
910 yield tuple(sorted(siblings))
913 yield tuple(sorted(siblings))
911 # then test the base from our parent's delta chain.
914 # then test the base from our parent's delta chain.
912 yield tuple(sorted(snaps))
915 yield tuple(sorted(snaps))
913 floor = min(snaps)
916 floor = min(snaps)
914 # No suitable base found in the parent chain, search if any full
917 # No suitable base found in the parent chain, search if any full
915 # snapshots emitted since parent's base would be a suitable base for an
918 # snapshots emitted since parent's base would be a suitable base for an
916 # intermediate snapshot.
919 # intermediate snapshot.
917 #
920 #
918 # It give a chance to reuse a delta chain unrelated to the current
921 # It give a chance to reuse a delta chain unrelated to the current
919 # revisions instead of starting our own. Without such re-use,
922 # revisions instead of starting our own. Without such re-use,
920 # topological branches would keep reopening new full chains. Creating
923 # topological branches would keep reopening new full chains. Creating
921 # more and more snapshot as the repository grow.
924 # more and more snapshot as the repository grow.
922 yield tuple(snapshots[nullrev])
925 yield tuple(snapshots[nullrev])
923
926
924 if not sparse:
927 if not sparse:
925 # other approach failed try against prev to hopefully save us a
928 # other approach failed try against prev to hopefully save us a
926 # fulltext.
929 # fulltext.
927 yield (prev,)
930 yield (prev,)
928
931
929
932
930 class deltacomputer:
933 class deltacomputer:
931 def __init__(self, revlog):
934 def __init__(self, revlog, write_debug=None):
932 self.revlog = revlog
935 self.revlog = revlog
936 self._write_debug = write_debug
933
937
934 def buildtext(self, revinfo, fh):
938 def buildtext(self, revinfo, fh):
935 """Builds a fulltext version of a revision
939 """Builds a fulltext version of a revision
936
940
937 revinfo: revisioninfo instance that contains all needed info
941 revinfo: revisioninfo instance that contains all needed info
938 fh: file handle to either the .i or the .d revlog file,
942 fh: file handle to either the .i or the .d revlog file,
939 depending on whether it is inlined or not
943 depending on whether it is inlined or not
940 """
944 """
941 btext = revinfo.btext
945 btext = revinfo.btext
942 if btext[0] is not None:
946 if btext[0] is not None:
943 return btext[0]
947 return btext[0]
944
948
945 revlog = self.revlog
949 revlog = self.revlog
946 cachedelta = revinfo.cachedelta
950 cachedelta = revinfo.cachedelta
947 baserev = cachedelta[0]
951 baserev = cachedelta[0]
948 delta = cachedelta[1]
952 delta = cachedelta[1]
949
953
950 fulltext = btext[0] = _textfromdelta(
954 fulltext = btext[0] = _textfromdelta(
951 fh,
955 fh,
952 revlog,
956 revlog,
953 baserev,
957 baserev,
954 delta,
958 delta,
955 revinfo.p1,
959 revinfo.p1,
956 revinfo.p2,
960 revinfo.p2,
957 revinfo.flags,
961 revinfo.flags,
958 revinfo.node,
962 revinfo.node,
959 )
963 )
960 return fulltext
964 return fulltext
961
965
962 def _builddeltadiff(self, base, revinfo, fh):
966 def _builddeltadiff(self, base, revinfo, fh):
963 revlog = self.revlog
967 revlog = self.revlog
964 t = self.buildtext(revinfo, fh)
968 t = self.buildtext(revinfo, fh)
965 if revlog.iscensored(base):
969 if revlog.iscensored(base):
966 # deltas based on a censored revision must replace the
970 # deltas based on a censored revision must replace the
967 # full content in one patch, so delta works everywhere
971 # full content in one patch, so delta works everywhere
968 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
972 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
969 delta = header + t
973 delta = header + t
970 else:
974 else:
971 ptext = revlog.rawdata(base, _df=fh)
975 ptext = revlog.rawdata(base, _df=fh)
972 delta = mdiff.textdiff(ptext, t)
976 delta = mdiff.textdiff(ptext, t)
973
977
974 return delta
978 return delta
975
979
976 def _builddeltainfo(self, revinfo, base, fh):
980 def _builddeltainfo(self, revinfo, base, fh):
977 # can we use the cached delta?
981 # can we use the cached delta?
978 revlog = self.revlog
982 revlog = self.revlog
979 chainbase = revlog.chainbase(base)
983 chainbase = revlog.chainbase(base)
980 if revlog._generaldelta:
984 if revlog._generaldelta:
981 deltabase = base
985 deltabase = base
982 else:
986 else:
983 deltabase = chainbase
987 deltabase = chainbase
984 snapshotdepth = None
988 snapshotdepth = None
985 if revlog._sparserevlog and deltabase == nullrev:
989 if revlog._sparserevlog and deltabase == nullrev:
986 snapshotdepth = 0
990 snapshotdepth = 0
987 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
991 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
988 # A delta chain should always be one full snapshot,
992 # A delta chain should always be one full snapshot,
989 # zero or more semi-snapshots, and zero or more deltas
993 # zero or more semi-snapshots, and zero or more deltas
990 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
994 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
991 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
995 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
992 snapshotdepth = len(revlog._deltachain(deltabase)[0])
996 snapshotdepth = len(revlog._deltachain(deltabase)[0])
993 delta = None
997 delta = None
994 if revinfo.cachedelta:
998 if revinfo.cachedelta:
995 cachebase, cachediff = revinfo.cachedelta
999 cachebase, cachediff = revinfo.cachedelta
996 # check if the diff still apply
1000 # check if the diff still apply
997 currentbase = cachebase
1001 currentbase = cachebase
998 while (
1002 while (
999 currentbase != nullrev
1003 currentbase != nullrev
1000 and currentbase != base
1004 and currentbase != base
1001 and self.revlog.length(currentbase) == 0
1005 and self.revlog.length(currentbase) == 0
1002 ):
1006 ):
1003 currentbase = self.revlog.deltaparent(currentbase)
1007 currentbase = self.revlog.deltaparent(currentbase)
1004 if self.revlog._lazydelta and currentbase == base:
1008 if self.revlog._lazydelta and currentbase == base:
1005 delta = revinfo.cachedelta[1]
1009 delta = revinfo.cachedelta[1]
1006 if delta is None:
1010 if delta is None:
1007 delta = self._builddeltadiff(base, revinfo, fh)
1011 delta = self._builddeltadiff(base, revinfo, fh)
1008 # snapshotdept need to be neither None nor 0 level snapshot
1012 # snapshotdept need to be neither None nor 0 level snapshot
1009 if revlog.upperboundcomp is not None and snapshotdepth:
1013 if revlog.upperboundcomp is not None and snapshotdepth:
1010 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1014 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1011 snapshotlimit = revinfo.textlen >> snapshotdepth
1015 snapshotlimit = revinfo.textlen >> snapshotdepth
1012 if snapshotlimit < lowestrealisticdeltalen:
1016 if snapshotlimit < lowestrealisticdeltalen:
1013 return None
1017 return None
1014 if revlog.length(base) < lowestrealisticdeltalen:
1018 if revlog.length(base) < lowestrealisticdeltalen:
1015 return None
1019 return None
1016 header, data = revlog.compress(delta)
1020 header, data = revlog.compress(delta)
1017 deltalen = len(header) + len(data)
1021 deltalen = len(header) + len(data)
1018 offset = revlog.end(len(revlog) - 1)
1022 offset = revlog.end(len(revlog) - 1)
1019 dist = deltalen + offset - revlog.start(chainbase)
1023 dist = deltalen + offset - revlog.start(chainbase)
1020 chainlen, compresseddeltalen = revlog._chaininfo(base)
1024 chainlen, compresseddeltalen = revlog._chaininfo(base)
1021 chainlen += 1
1025 chainlen += 1
1022 compresseddeltalen += deltalen
1026 compresseddeltalen += deltalen
1023
1027
1024 return _deltainfo(
1028 return _deltainfo(
1025 dist,
1029 dist,
1026 deltalen,
1030 deltalen,
1027 (header, data),
1031 (header, data),
1028 deltabase,
1032 deltabase,
1029 chainbase,
1033 chainbase,
1030 chainlen,
1034 chainlen,
1031 compresseddeltalen,
1035 compresseddeltalen,
1032 snapshotdepth,
1036 snapshotdepth,
1033 )
1037 )
1034
1038
1035 def _fullsnapshotinfo(self, fh, revinfo, curr):
1039 def _fullsnapshotinfo(self, fh, revinfo, curr):
1036 rawtext = self.buildtext(revinfo, fh)
1040 rawtext = self.buildtext(revinfo, fh)
1037 data = self.revlog.compress(rawtext)
1041 data = self.revlog.compress(rawtext)
1038 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1042 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1039 deltabase = chainbase = curr
1043 deltabase = chainbase = curr
1040 snapshotdepth = 0
1044 snapshotdepth = 0
1041 chainlen = 1
1045 chainlen = 1
1042
1046
1043 return _deltainfo(
1047 return _deltainfo(
1044 dist,
1048 dist,
1045 deltalen,
1049 deltalen,
1046 data,
1050 data,
1047 deltabase,
1051 deltabase,
1048 chainbase,
1052 chainbase,
1049 chainlen,
1053 chainlen,
1050 compresseddeltalen,
1054 compresseddeltalen,
1051 snapshotdepth,
1055 snapshotdepth,
1052 )
1056 )
1053
1057
1054 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1058 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1055 """Find an acceptable delta against a candidate revision
1059 """Find an acceptable delta against a candidate revision
1056
1060
1057 revinfo: information about the revision (instance of _revisioninfo)
1061 revinfo: information about the revision (instance of _revisioninfo)
1058 fh: file handle to either the .i or the .d revlog file,
1062 fh: file handle to either the .i or the .d revlog file,
1059 depending on whether it is inlined or not
1063 depending on whether it is inlined or not
1060
1064
1061 Returns the first acceptable candidate revision, as ordered by
1065 Returns the first acceptable candidate revision, as ordered by
1062 _candidategroups
1066 _candidategroups
1063
1067
1064 If no suitable deltabase is found, we return delta info for a full
1068 If no suitable deltabase is found, we return delta info for a full
1065 snapshot.
1069 snapshot.
1066
1070
1067 `excluded_bases` is an optional set of revision that cannot be used as
1071 `excluded_bases` is an optional set of revision that cannot be used as
1068 a delta base. Use this to recompute delta suitable in censor or strip
1072 a delta base. Use this to recompute delta suitable in censor or strip
1069 context.
1073 context.
1070 """
1074 """
1071 if target_rev is None:
1075 if target_rev is None:
1072 target_rev = len(self.revlog)
1076 target_rev = len(self.revlog)
1073
1077
1074 if not revinfo.textlen:
1078 if not revinfo.textlen:
1075 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1079 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1076
1080
1077 if excluded_bases is None:
1081 if excluded_bases is None:
1078 excluded_bases = set()
1082 excluded_bases = set()
1079
1083
1080 # no delta for flag processor revision (see "candelta" for why)
1084 # no delta for flag processor revision (see "candelta" for why)
1081 # not calling candelta since only one revision needs test, also to
1085 # not calling candelta since only one revision needs test, also to
1082 # avoid overhead fetching flags again.
1086 # avoid overhead fetching flags again.
1083 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1087 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1084 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1088 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1085
1089
1090 if self._write_debug is not None:
1091 start = util.timer()
1092
1093 # count the number of different delta we tried (for debug purpose)
1094 dbg_try_count = 0
1095 # count the number of "search round" we did. (for debug purpose)
1096 dbg_try_rounds = 0
1097 dbg_type = b'unknown'
1098
1086 cachedelta = revinfo.cachedelta
1099 cachedelta = revinfo.cachedelta
1087 p1 = revinfo.p1
1100 p1 = revinfo.p1
1088 p2 = revinfo.p2
1101 p2 = revinfo.p2
1089 revlog = self.revlog
1102 revlog = self.revlog
1090
1103
1091 deltainfo = None
1104 deltainfo = None
1092 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1105 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1106
1107 if self._write_debug is not None:
1108 if p1r != nullrev:
1109 p1_chain_len = revlog._chaininfo(p1r)[0]
1110 else:
1111 p1_chain_len = -1
1112 if p2r != nullrev:
1113 p2_chain_len = revlog._chaininfo(p2r)[0]
1114 else:
1115 p2_chain_len = -1
1116
1093 groups = _candidategroups(
1117 groups = _candidategroups(
1094 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1118 self.revlog, revinfo.textlen, p1r, p2r, cachedelta
1095 )
1119 )
1096 candidaterevs = next(groups)
1120 candidaterevs = next(groups)
1097 while candidaterevs is not None:
1121 while candidaterevs is not None:
1122 dbg_try_rounds += 1
1098 nominateddeltas = []
1123 nominateddeltas = []
1099 if deltainfo is not None:
1124 if deltainfo is not None:
1100 # if we already found a good delta,
1125 # if we already found a good delta,
1101 # challenge it against refined candidates
1126 # challenge it against refined candidates
1102 nominateddeltas.append(deltainfo)
1127 nominateddeltas.append(deltainfo)
1103 for candidaterev in candidaterevs:
1128 for candidaterev in candidaterevs:
1104 if candidaterev in excluded_bases:
1129 if candidaterev in excluded_bases:
1105 continue
1130 continue
1106 if candidaterev >= target_rev:
1131 if candidaterev >= target_rev:
1107 continue
1132 continue
1133 dbg_try_count += 1
1108 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1134 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1109 if candidatedelta is not None:
1135 if candidatedelta is not None:
1110 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1136 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1111 nominateddeltas.append(candidatedelta)
1137 nominateddeltas.append(candidatedelta)
1112 if nominateddeltas:
1138 if nominateddeltas:
1113 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1139 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1114 if deltainfo is not None:
1140 if deltainfo is not None:
1115 candidaterevs = groups.send(deltainfo.base)
1141 candidaterevs = groups.send(deltainfo.base)
1116 else:
1142 else:
1117 candidaterevs = next(groups)
1143 candidaterevs = next(groups)
1118
1144
1119 if deltainfo is None:
1145 if deltainfo is None:
1146 dbg_type = b"full"
1120 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1147 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1148 elif deltainfo.snapshotdepth:
1149 dbg_type = b"snapshot"
1150 else:
1151 dbg_type = b"delta"
1152
1153 if self._write_debug is not None:
1154 end = util.timer()
1155 dbg = {
1156 'duration': end - start,
1157 'revision': target_rev,
1158 'search_round_count': dbg_try_rounds,
1159 'delta_try_count': dbg_try_count,
1160 'type': dbg_type,
1161 'p1-chain-len': p1_chain_len,
1162 'p2-chain-len': p2_chain_len,
1163 }
1164 if deltainfo.snapshotdepth is not None:
1165 dbg['snapshot-depth'] = deltainfo.snapshotdepth
1166 else:
1167 dbg['snapshot-depth'] = 0
1168 target_revlog = b"UNKNOWN"
1169 target_type = self.revlog.target[0]
1170 target_key = self.revlog.target[1]
1171 if target_type == KIND_CHANGELOG:
1172 target_revlog = b'CHANGELOG:'
1173 elif target_type == KIND_MANIFESTLOG:
1174 target_revlog = b'MANIFESTLOG:'
1175 if target_key:
1176 target_revlog += b'%s:' % target_key
1177 elif target_type == KIND_FILELOG:
1178 target_revlog = b'FILELOG:'
1179 if target_key:
1180 target_revlog += b'%s:' % target_key
1181 dbg['target-revlog'] = target_revlog
1182
1183 msg = (
1184 b"DBG-DELTAS:"
1185 b" %-12s"
1186 b" rev=%d:"
1187 b" search-rounds=%d"
1188 b" try-count=%d"
1189 b" - delta-type=%-6s"
1190 b" snap-depth=%d"
1191 b" - p1-chain-length=%d"
1192 b" p2-chain-length=%d"
1193 b" - duration=%f"
1194 b"\n"
1195 )
1196 msg %= (
1197 dbg["target-revlog"],
1198 dbg["revision"],
1199 dbg["search_round_count"],
1200 dbg["delta_try_count"],
1201 dbg["type"],
1202 dbg["snapshot-depth"],
1203 dbg["p1-chain-len"],
1204 dbg["p2-chain-len"],
1205 dbg["duration"],
1206 )
1207 self._write_debug(msg)
1121 return deltainfo
1208 return deltainfo
1122
1209
1123
1210
1124 def delta_compression(default_compression_header, deltainfo):
1211 def delta_compression(default_compression_header, deltainfo):
1125 """return (COMPRESSION_MODE, deltainfo)
1212 """return (COMPRESSION_MODE, deltainfo)
1126
1213
1127 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1214 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1128 compression.
1215 compression.
1129 """
1216 """
1130 h, d = deltainfo.data
1217 h, d = deltainfo.data
1131 compression_mode = COMP_MODE_INLINE
1218 compression_mode = COMP_MODE_INLINE
1132 if not h and not d:
1219 if not h and not d:
1133 # not data to store at all... declare them uncompressed
1220 # not data to store at all... declare them uncompressed
1134 compression_mode = COMP_MODE_PLAIN
1221 compression_mode = COMP_MODE_PLAIN
1135 elif not h:
1222 elif not h:
1136 t = d[0:1]
1223 t = d[0:1]
1137 if t == b'\0':
1224 if t == b'\0':
1138 compression_mode = COMP_MODE_PLAIN
1225 compression_mode = COMP_MODE_PLAIN
1139 elif t == default_compression_header:
1226 elif t == default_compression_header:
1140 compression_mode = COMP_MODE_DEFAULT
1227 compression_mode = COMP_MODE_DEFAULT
1141 elif h == b'u':
1228 elif h == b'u':
1142 # we have a more efficient way to declare uncompressed
1229 # we have a more efficient way to declare uncompressed
1143 h = b''
1230 h = b''
1144 compression_mode = COMP_MODE_PLAIN
1231 compression_mode = COMP_MODE_PLAIN
1145 deltainfo = drop_u_compression(deltainfo)
1232 deltainfo = drop_u_compression(deltainfo)
1146 return compression_mode, deltainfo
1233 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now