##// END OF EJS Templates
delta-find: adjust the moment when we mark something as "tested"...
marmoute -
r50511:4956942c default
parent child Browse files
Show More
@@ -1,1398 +1,1407 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 KIND_CHANGELOG,
23 KIND_CHANGELOG,
24 KIND_FILELOG,
24 KIND_FILELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 REVIDX_ISCENSORED,
26 REVIDX_ISCENSORED,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 )
28 )
29
29
30 from ..thirdparty import attr
30 from ..thirdparty import attr
31
31
32 from .. import (
32 from .. import (
33 error,
33 error,
34 mdiff,
34 mdiff,
35 util,
35 util,
36 )
36 )
37
37
38 from . import flagutil
38 from . import flagutil
39
39
40 # maximum <delta-chain-data>/<revision-text-length> ratio
40 # maximum <delta-chain-data>/<revision-text-length> ratio
41 LIMIT_DELTA2TEXT = 2
41 LIMIT_DELTA2TEXT = 2
42
42
43
43
44 class _testrevlog:
44 class _testrevlog:
45 """minimalist fake revlog to use in doctests"""
45 """minimalist fake revlog to use in doctests"""
46
46
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 """data is an list of revision payload boundaries"""
48 """data is an list of revision payload boundaries"""
49 self._data = data
49 self._data = data
50 self._srdensitythreshold = density
50 self._srdensitythreshold = density
51 self._srmingapsize = mingap
51 self._srmingapsize = mingap
52 self._snapshot = set(snapshot)
52 self._snapshot = set(snapshot)
53 self.index = None
53 self.index = None
54
54
55 def start(self, rev):
55 def start(self, rev):
56 if rev == nullrev:
56 if rev == nullrev:
57 return 0
57 return 0
58 if rev == 0:
58 if rev == 0:
59 return 0
59 return 0
60 return self._data[rev - 1]
60 return self._data[rev - 1]
61
61
62 def end(self, rev):
62 def end(self, rev):
63 if rev == nullrev:
63 if rev == nullrev:
64 return 0
64 return 0
65 return self._data[rev]
65 return self._data[rev]
66
66
67 def length(self, rev):
67 def length(self, rev):
68 return self.end(rev) - self.start(rev)
68 return self.end(rev) - self.start(rev)
69
69
70 def __len__(self):
70 def __len__(self):
71 return len(self._data)
71 return len(self._data)
72
72
73 def issnapshot(self, rev):
73 def issnapshot(self, rev):
74 if rev == nullrev:
74 if rev == nullrev:
75 return True
75 return True
76 return rev in self._snapshot
76 return rev in self._snapshot
77
77
78
78
79 def slicechunk(revlog, revs, targetsize=None):
79 def slicechunk(revlog, revs, targetsize=None):
80 """slice revs to reduce the amount of unrelated data to be read from disk.
80 """slice revs to reduce the amount of unrelated data to be read from disk.
81
81
82 ``revs`` is sliced into groups that should be read in one time.
82 ``revs`` is sliced into groups that should be read in one time.
83 Assume that revs are sorted.
83 Assume that revs are sorted.
84
84
85 The initial chunk is sliced until the overall density (payload/chunks-span
85 The initial chunk is sliced until the overall density (payload/chunks-span
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 `revlog._srmingapsize` is skipped.
87 `revlog._srmingapsize` is skipped.
88
88
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 For consistency with other slicing choice, this limit won't go lower than
90 For consistency with other slicing choice, this limit won't go lower than
91 `revlog._srmingapsize`.
91 `revlog._srmingapsize`.
92
92
93 If individual revisions chunk are larger than this limit, they will still
93 If individual revisions chunk are larger than this limit, they will still
94 be raised individually.
94 be raised individually.
95
95
96 >>> data = [
96 >>> data = [
97 ... 5, #00 (5)
97 ... 5, #00 (5)
98 ... 10, #01 (5)
98 ... 10, #01 (5)
99 ... 12, #02 (2)
99 ... 12, #02 (2)
100 ... 12, #03 (empty)
100 ... 12, #03 (empty)
101 ... 27, #04 (15)
101 ... 27, #04 (15)
102 ... 31, #05 (4)
102 ... 31, #05 (4)
103 ... 31, #06 (empty)
103 ... 31, #06 (empty)
104 ... 42, #07 (11)
104 ... 42, #07 (11)
105 ... 47, #08 (5)
105 ... 47, #08 (5)
106 ... 47, #09 (empty)
106 ... 47, #09 (empty)
107 ... 48, #10 (1)
107 ... 48, #10 (1)
108 ... 51, #11 (3)
108 ... 51, #11 (3)
109 ... 74, #12 (23)
109 ... 74, #12 (23)
110 ... 85, #13 (11)
110 ... 85, #13 (11)
111 ... 86, #14 (1)
111 ... 86, #14 (1)
112 ... 91, #15 (5)
112 ... 91, #15 (5)
113 ... ]
113 ... ]
114 >>> revlog = _testrevlog(data, snapshot=range(16))
114 >>> revlog = _testrevlog(data, snapshot=range(16))
115
115
116 >>> list(slicechunk(revlog, list(range(16))))
116 >>> list(slicechunk(revlog, list(range(16))))
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 >>> list(slicechunk(revlog, [0, 15]))
118 >>> list(slicechunk(revlog, [0, 15]))
119 [[0], [15]]
119 [[0], [15]]
120 >>> list(slicechunk(revlog, [0, 11, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 15]))
121 [[0], [11], [15]]
121 [[0], [11], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 [[0], [11, 13, 15]]
123 [[0], [11, 13, 15]]
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 [[1, 2], [5, 8, 10, 11], [14]]
125 [[1, 2], [5, 8, 10, 11], [14]]
126
126
127 Slicing with a maximum chunk size
127 Slicing with a maximum chunk size
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 [[0], [11], [13], [15]]
129 [[0], [11], [13], [15]]
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 [[0], [11], [13, 15]]
131 [[0], [11], [13, 15]]
132
132
133 Slicing involving nullrev
133 Slicing involving nullrev
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 [[-1, 0], [11], [13, 15]]
135 [[-1, 0], [11], [13, 15]]
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 [[-1], [13], [15]]
137 [[-1], [13], [15]]
138 """
138 """
139 if targetsize is not None:
139 if targetsize is not None:
140 targetsize = max(targetsize, revlog._srmingapsize)
140 targetsize = max(targetsize, revlog._srmingapsize)
141 # targetsize should not be specified when evaluating delta candidates:
141 # targetsize should not be specified when evaluating delta candidates:
142 # * targetsize is used to ensure we stay within specification when reading,
142 # * targetsize is used to ensure we stay within specification when reading,
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 if densityslicing is None:
144 if densityslicing is None:
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 for chunk in densityslicing(
146 for chunk in densityslicing(
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 ):
148 ):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 yield subchunk
150 yield subchunk
151
151
152
152
153 def _slicechunktosize(revlog, revs, targetsize=None):
153 def _slicechunktosize(revlog, revs, targetsize=None):
154 """slice revs to match the target size
154 """slice revs to match the target size
155
155
156 This is intended to be used on chunk that density slicing selected by that
156 This is intended to be used on chunk that density slicing selected by that
157 are still too large compared to the read garantee of revlog. This might
157 are still too large compared to the read garantee of revlog. This might
158 happens when "minimal gap size" interrupted the slicing or when chain are
158 happens when "minimal gap size" interrupted the slicing or when chain are
159 built in a way that create large blocks next to each other.
159 built in a way that create large blocks next to each other.
160
160
161 >>> data = [
161 >>> data = [
162 ... 3, #0 (3)
162 ... 3, #0 (3)
163 ... 5, #1 (2)
163 ... 5, #1 (2)
164 ... 6, #2 (1)
164 ... 6, #2 (1)
165 ... 8, #3 (2)
165 ... 8, #3 (2)
166 ... 8, #4 (empty)
166 ... 8, #4 (empty)
167 ... 11, #5 (3)
167 ... 11, #5 (3)
168 ... 12, #6 (1)
168 ... 12, #6 (1)
169 ... 13, #7 (1)
169 ... 13, #7 (1)
170 ... 14, #8 (1)
170 ... 14, #8 (1)
171 ... ]
171 ... ]
172
172
173 == All snapshots cases ==
173 == All snapshots cases ==
174 >>> revlog = _testrevlog(data, snapshot=range(9))
174 >>> revlog = _testrevlog(data, snapshot=range(9))
175
175
176 Cases where chunk is already small enough
176 Cases where chunk is already small enough
177 >>> list(_slicechunktosize(revlog, [0], 3))
177 >>> list(_slicechunktosize(revlog, [0], 3))
178 [[0]]
178 [[0]]
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 [[6, 7]]
180 [[6, 7]]
181 >>> list(_slicechunktosize(revlog, [0], None))
181 >>> list(_slicechunktosize(revlog, [0], None))
182 [[0]]
182 [[0]]
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 [[6, 7]]
184 [[6, 7]]
185
185
186 cases where we need actual slicing
186 cases where we need actual slicing
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 [[0], [1]]
188 [[0], [1]]
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 [[1], [3]]
190 [[1], [3]]
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 [[1, 2], [3]]
192 [[1, 2], [3]]
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 [[3], [5]]
194 [[3], [5]]
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 [[5], [6, 7, 8]]
198 [[5], [6, 7, 8]]
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
201
201
202 Case with too large individual chunk (must return valid chunk)
202 Case with too large individual chunk (must return valid chunk)
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 [[0], [1]]
204 [[0], [1]]
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 [[1], [3]]
206 [[1], [3]]
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 [[3], [5]]
208 [[3], [5]]
209
209
210 == No Snapshot cases ==
210 == No Snapshot cases ==
211 >>> revlog = _testrevlog(data)
211 >>> revlog = _testrevlog(data)
212
212
213 Cases where chunk is already small enough
213 Cases where chunk is already small enough
214 >>> list(_slicechunktosize(revlog, [0], 3))
214 >>> list(_slicechunktosize(revlog, [0], 3))
215 [[0]]
215 [[0]]
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 [[6, 7]]
217 [[6, 7]]
218 >>> list(_slicechunktosize(revlog, [0], None))
218 >>> list(_slicechunktosize(revlog, [0], None))
219 [[0]]
219 [[0]]
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 [[6, 7]]
221 [[6, 7]]
222
222
223 cases where we need actual slicing
223 cases where we need actual slicing
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 [[0], [1]]
225 [[0], [1]]
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 [[1], [3]]
227 [[1], [3]]
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 [[1], [2, 3]]
229 [[1], [2, 3]]
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 [[3], [5]]
231 [[3], [5]]
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 [[3], [4, 5]]
233 [[3], [4, 5]]
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 [[5], [6, 7, 8]]
235 [[5], [6, 7, 8]]
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
238
238
239 Case with too large individual chunk (must return valid chunk)
239 Case with too large individual chunk (must return valid chunk)
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 [[0], [1]]
241 [[0], [1]]
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 [[1], [3]]
243 [[1], [3]]
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 [[3], [5]]
245 [[3], [5]]
246
246
247 == mixed case ==
247 == mixed case ==
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 """
251 """
252 assert targetsize is None or 0 <= targetsize
252 assert targetsize is None or 0 <= targetsize
253 startdata = revlog.start(revs[0])
253 startdata = revlog.start(revs[0])
254 enddata = revlog.end(revs[-1])
254 enddata = revlog.end(revs[-1])
255 fullspan = enddata - startdata
255 fullspan = enddata - startdata
256 if targetsize is None or fullspan <= targetsize:
256 if targetsize is None or fullspan <= targetsize:
257 yield revs
257 yield revs
258 return
258 return
259
259
260 startrevidx = 0
260 startrevidx = 0
261 endrevidx = 1
261 endrevidx = 1
262 iterrevs = enumerate(revs)
262 iterrevs = enumerate(revs)
263 next(iterrevs) # skip first rev.
263 next(iterrevs) # skip first rev.
264 # first step: get snapshots out of the way
264 # first step: get snapshots out of the way
265 for idx, r in iterrevs:
265 for idx, r in iterrevs:
266 span = revlog.end(r) - startdata
266 span = revlog.end(r) - startdata
267 snapshot = revlog.issnapshot(r)
267 snapshot = revlog.issnapshot(r)
268 if span <= targetsize and snapshot:
268 if span <= targetsize and snapshot:
269 endrevidx = idx + 1
269 endrevidx = idx + 1
270 else:
270 else:
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 if chunk:
272 if chunk:
273 yield chunk
273 yield chunk
274 startrevidx = idx
274 startrevidx = idx
275 startdata = revlog.start(r)
275 startdata = revlog.start(r)
276 endrevidx = idx + 1
276 endrevidx = idx + 1
277 if not snapshot:
277 if not snapshot:
278 break
278 break
279
279
280 # for the others, we use binary slicing to quickly converge toward valid
280 # for the others, we use binary slicing to quickly converge toward valid
281 # chunks (otherwise, we might end up looking for start/end of many
281 # chunks (otherwise, we might end up looking for start/end of many
282 # revisions). This logic is not looking for the perfect slicing point, it
282 # revisions). This logic is not looking for the perfect slicing point, it
283 # focuses on quickly converging toward valid chunks.
283 # focuses on quickly converging toward valid chunks.
284 nbitem = len(revs)
284 nbitem = len(revs)
285 while (enddata - startdata) > targetsize:
285 while (enddata - startdata) > targetsize:
286 endrevidx = nbitem
286 endrevidx = nbitem
287 if nbitem - startrevidx <= 1:
287 if nbitem - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
289 localenddata = revlog.end(revs[endrevidx - 1])
289 localenddata = revlog.end(revs[endrevidx - 1])
290 span = localenddata - startdata
290 span = localenddata - startdata
291 while span > targetsize:
291 while span > targetsize:
292 if endrevidx - startrevidx <= 1:
292 if endrevidx - startrevidx <= 1:
293 break # protect against individual chunk larger than limit
293 break # protect against individual chunk larger than limit
294 endrevidx -= (endrevidx - startrevidx) // 2
294 endrevidx -= (endrevidx - startrevidx) // 2
295 localenddata = revlog.end(revs[endrevidx - 1])
295 localenddata = revlog.end(revs[endrevidx - 1])
296 span = localenddata - startdata
296 span = localenddata - startdata
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 if chunk:
298 if chunk:
299 yield chunk
299 yield chunk
300 startrevidx = endrevidx
300 startrevidx = endrevidx
301 startdata = revlog.start(revs[startrevidx])
301 startdata = revlog.start(revs[startrevidx])
302
302
303 chunk = _trimchunk(revlog, revs, startrevidx)
303 chunk = _trimchunk(revlog, revs, startrevidx)
304 if chunk:
304 if chunk:
305 yield chunk
305 yield chunk
306
306
307
307
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 """slice revs to reduce the amount of unrelated data to be read from disk.
309 """slice revs to reduce the amount of unrelated data to be read from disk.
310
310
311 ``revs`` is sliced into groups that should be read in one time.
311 ``revs`` is sliced into groups that should be read in one time.
312 Assume that revs are sorted.
312 Assume that revs are sorted.
313
313
314 The initial chunk is sliced until the overall density (payload/chunks-span
314 The initial chunk is sliced until the overall density (payload/chunks-span
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 skipped.
316 skipped.
317
317
318 >>> revlog = _testrevlog([
318 >>> revlog = _testrevlog([
319 ... 5, #00 (5)
319 ... 5, #00 (5)
320 ... 10, #01 (5)
320 ... 10, #01 (5)
321 ... 12, #02 (2)
321 ... 12, #02 (2)
322 ... 12, #03 (empty)
322 ... 12, #03 (empty)
323 ... 27, #04 (15)
323 ... 27, #04 (15)
324 ... 31, #05 (4)
324 ... 31, #05 (4)
325 ... 31, #06 (empty)
325 ... 31, #06 (empty)
326 ... 42, #07 (11)
326 ... 42, #07 (11)
327 ... 47, #08 (5)
327 ... 47, #08 (5)
328 ... 47, #09 (empty)
328 ... 47, #09 (empty)
329 ... 48, #10 (1)
329 ... 48, #10 (1)
330 ... 51, #11 (3)
330 ... 51, #11 (3)
331 ... 74, #12 (23)
331 ... 74, #12 (23)
332 ... 85, #13 (11)
332 ... 85, #13 (11)
333 ... 86, #14 (1)
333 ... 86, #14 (1)
334 ... 91, #15 (5)
334 ... 91, #15 (5)
335 ... ])
335 ... ])
336
336
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 [[0], [15]]
340 [[0], [15]]
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 [[0], [11], [15]]
342 [[0], [11], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 [[0], [11, 13, 15]]
344 [[0], [11, 13, 15]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 [[1, 2], [5, 8, 10, 11], [14]]
346 [[1, 2], [5, 8, 10, 11], [14]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 ... mingapsize=20))
348 ... mingapsize=20))
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 ... targetdensity=0.95))
351 ... targetdensity=0.95))
352 [[1, 2], [5], [8, 10, 11], [14]]
352 [[1, 2], [5], [8, 10, 11], [14]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 ... targetdensity=0.95, mingapsize=12))
354 ... targetdensity=0.95, mingapsize=12))
355 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
356 """
356 """
357 start = revlog.start
357 start = revlog.start
358 length = revlog.length
358 length = revlog.length
359
359
360 if len(revs) <= 1:
360 if len(revs) <= 1:
361 yield revs
361 yield revs
362 return
362 return
363
363
364 deltachainspan = segmentspan(revlog, revs)
364 deltachainspan = segmentspan(revlog, revs)
365
365
366 if deltachainspan < mingapsize:
366 if deltachainspan < mingapsize:
367 yield revs
367 yield revs
368 return
368 return
369
369
370 readdata = deltachainspan
370 readdata = deltachainspan
371 chainpayload = sum(length(r) for r in revs)
371 chainpayload = sum(length(r) for r in revs)
372
372
373 if deltachainspan:
373 if deltachainspan:
374 density = chainpayload / float(deltachainspan)
374 density = chainpayload / float(deltachainspan)
375 else:
375 else:
376 density = 1.0
376 density = 1.0
377
377
378 if density >= targetdensity:
378 if density >= targetdensity:
379 yield revs
379 yield revs
380 return
380 return
381
381
382 # Store the gaps in a heap to have them sorted by decreasing size
382 # Store the gaps in a heap to have them sorted by decreasing size
383 gaps = []
383 gaps = []
384 prevend = None
384 prevend = None
385 for i, rev in enumerate(revs):
385 for i, rev in enumerate(revs):
386 revstart = start(rev)
386 revstart = start(rev)
387 revlen = length(rev)
387 revlen = length(rev)
388
388
389 # Skip empty revisions to form larger holes
389 # Skip empty revisions to form larger holes
390 if revlen == 0:
390 if revlen == 0:
391 continue
391 continue
392
392
393 if prevend is not None:
393 if prevend is not None:
394 gapsize = revstart - prevend
394 gapsize = revstart - prevend
395 # only consider holes that are large enough
395 # only consider holes that are large enough
396 if gapsize > mingapsize:
396 if gapsize > mingapsize:
397 gaps.append((gapsize, i))
397 gaps.append((gapsize, i))
398
398
399 prevend = revstart + revlen
399 prevend = revstart + revlen
400 # sort the gaps to pop them from largest to small
400 # sort the gaps to pop them from largest to small
401 gaps.sort()
401 gaps.sort()
402
402
403 # Collect the indices of the largest holes until the density is acceptable
403 # Collect the indices of the largest holes until the density is acceptable
404 selected = []
404 selected = []
405 while gaps and density < targetdensity:
405 while gaps and density < targetdensity:
406 gapsize, gapidx = gaps.pop()
406 gapsize, gapidx = gaps.pop()
407
407
408 selected.append(gapidx)
408 selected.append(gapidx)
409
409
410 # the gap sizes are stored as negatives to be sorted decreasingly
410 # the gap sizes are stored as negatives to be sorted decreasingly
411 # by the heap
411 # by the heap
412 readdata -= gapsize
412 readdata -= gapsize
413 if readdata > 0:
413 if readdata > 0:
414 density = chainpayload / float(readdata)
414 density = chainpayload / float(readdata)
415 else:
415 else:
416 density = 1.0
416 density = 1.0
417 selected.sort()
417 selected.sort()
418
418
419 # Cut the revs at collected indices
419 # Cut the revs at collected indices
420 previdx = 0
420 previdx = 0
421 for idx in selected:
421 for idx in selected:
422
422
423 chunk = _trimchunk(revlog, revs, previdx, idx)
423 chunk = _trimchunk(revlog, revs, previdx, idx)
424 if chunk:
424 if chunk:
425 yield chunk
425 yield chunk
426
426
427 previdx = idx
427 previdx = idx
428
428
429 chunk = _trimchunk(revlog, revs, previdx)
429 chunk = _trimchunk(revlog, revs, previdx)
430 if chunk:
430 if chunk:
431 yield chunk
431 yield chunk
432
432
433
433
434 def _trimchunk(revlog, revs, startidx, endidx=None):
434 def _trimchunk(revlog, revs, startidx, endidx=None):
435 """returns revs[startidx:endidx] without empty trailing revs
435 """returns revs[startidx:endidx] without empty trailing revs
436
436
437 Doctest Setup
437 Doctest Setup
438 >>> revlog = _testrevlog([
438 >>> revlog = _testrevlog([
439 ... 5, #0
439 ... 5, #0
440 ... 10, #1
440 ... 10, #1
441 ... 12, #2
441 ... 12, #2
442 ... 12, #3 (empty)
442 ... 12, #3 (empty)
443 ... 17, #4
443 ... 17, #4
444 ... 21, #5
444 ... 21, #5
445 ... 21, #6 (empty)
445 ... 21, #6 (empty)
446 ... ])
446 ... ])
447
447
448 Contiguous cases:
448 Contiguous cases:
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 [0, 1, 2, 3, 4, 5]
450 [0, 1, 2, 3, 4, 5]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 [0, 1, 2, 3, 4]
452 [0, 1, 2, 3, 4]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 [0, 1, 2]
454 [0, 1, 2]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 [2]
456 [2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 [3, 4, 5]
458 [3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 [3, 4]
460 [3, 4]
461
461
462 Discontiguous cases:
462 Discontiguous cases:
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 [1, 3, 5]
464 [1, 3, 5]
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 [1]
466 [1]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 [3, 5]
468 [3, 5]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 [3, 5]
470 [3, 5]
471 """
471 """
472 length = revlog.length
472 length = revlog.length
473
473
474 if endidx is None:
474 if endidx is None:
475 endidx = len(revs)
475 endidx = len(revs)
476
476
477 # If we have a non-emtpy delta candidate, there are nothing to trim
477 # If we have a non-emtpy delta candidate, there are nothing to trim
478 if revs[endidx - 1] < len(revlog):
478 if revs[endidx - 1] < len(revlog):
479 # Trim empty revs at the end, except the very first revision of a chain
479 # Trim empty revs at the end, except the very first revision of a chain
480 while (
480 while (
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 ):
482 ):
483 endidx -= 1
483 endidx -= 1
484
484
485 return revs[startidx:endidx]
485 return revs[startidx:endidx]
486
486
487
487
488 def segmentspan(revlog, revs):
488 def segmentspan(revlog, revs):
489 """Get the byte span of a segment of revisions
489 """Get the byte span of a segment of revisions
490
490
491 revs is a sorted array of revision numbers
491 revs is a sorted array of revision numbers
492
492
493 >>> revlog = _testrevlog([
493 >>> revlog = _testrevlog([
494 ... 5, #0
494 ... 5, #0
495 ... 10, #1
495 ... 10, #1
496 ... 12, #2
496 ... 12, #2
497 ... 12, #3 (empty)
497 ... 12, #3 (empty)
498 ... 17, #4
498 ... 17, #4
499 ... ])
499 ... ])
500
500
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 17
502 17
503 >>> segmentspan(revlog, [0, 4])
503 >>> segmentspan(revlog, [0, 4])
504 17
504 17
505 >>> segmentspan(revlog, [3, 4])
505 >>> segmentspan(revlog, [3, 4])
506 5
506 5
507 >>> segmentspan(revlog, [1, 2, 3,])
507 >>> segmentspan(revlog, [1, 2, 3,])
508 7
508 7
509 >>> segmentspan(revlog, [1, 3])
509 >>> segmentspan(revlog, [1, 3])
510 7
510 7
511 """
511 """
512 if not revs:
512 if not revs:
513 return 0
513 return 0
514 end = revlog.end(revs[-1])
514 end = revlog.end(revs[-1])
515 return end - revlog.start(revs[0])
515 return end - revlog.start(revs[0])
516
516
517
517
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 """build full text from a (base, delta) pair and other metadata"""
519 """build full text from a (base, delta) pair and other metadata"""
520 # special case deltas which replace entire base; no need to decode
520 # special case deltas which replace entire base; no need to decode
521 # base revision. this neatly avoids censored bases, which throw when
521 # base revision. this neatly avoids censored bases, which throw when
522 # they're decoded.
522 # they're decoded.
523 hlen = struct.calcsize(b">lll")
523 hlen = struct.calcsize(b">lll")
524 if delta[:hlen] == mdiff.replacediffheader(
524 if delta[:hlen] == mdiff.replacediffheader(
525 revlog.rawsize(baserev), len(delta) - hlen
525 revlog.rawsize(baserev), len(delta) - hlen
526 ):
526 ):
527 fulltext = delta[hlen:]
527 fulltext = delta[hlen:]
528 else:
528 else:
529 # deltabase is rawtext before changed by flag processors, which is
529 # deltabase is rawtext before changed by flag processors, which is
530 # equivalent to non-raw text
530 # equivalent to non-raw text
531 basetext = revlog.revision(baserev, _df=fh)
531 basetext = revlog.revision(baserev, _df=fh)
532 fulltext = mdiff.patch(basetext, delta)
532 fulltext = mdiff.patch(basetext, delta)
533
533
534 try:
534 try:
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 if validatehash:
536 if validatehash:
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 if flags & REVIDX_ISCENSORED:
538 if flags & REVIDX_ISCENSORED:
539 raise error.StorageError(
539 raise error.StorageError(
540 _(b'node %s is not censored') % expectednode
540 _(b'node %s is not censored') % expectednode
541 )
541 )
542 except error.CensoredNodeError:
542 except error.CensoredNodeError:
543 # must pass the censored index flag to add censored revisions
543 # must pass the censored index flag to add censored revisions
544 if not flags & REVIDX_ISCENSORED:
544 if not flags & REVIDX_ISCENSORED:
545 raise
545 raise
546 return fulltext
546 return fulltext
547
547
548
548
549 @attr.s(slots=True, frozen=True)
549 @attr.s(slots=True, frozen=True)
550 class _deltainfo:
550 class _deltainfo:
551 distance = attr.ib()
551 distance = attr.ib()
552 deltalen = attr.ib()
552 deltalen = attr.ib()
553 data = attr.ib()
553 data = attr.ib()
554 base = attr.ib()
554 base = attr.ib()
555 chainbase = attr.ib()
555 chainbase = attr.ib()
556 chainlen = attr.ib()
556 chainlen = attr.ib()
557 compresseddeltalen = attr.ib()
557 compresseddeltalen = attr.ib()
558 snapshotdepth = attr.ib()
558 snapshotdepth = attr.ib()
559
559
560
560
561 def drop_u_compression(delta):
561 def drop_u_compression(delta):
562 """turn into a "u" (no-compression) into no-compression without header
562 """turn into a "u" (no-compression) into no-compression without header
563
563
564 This is useful for revlog format that has better compression method.
564 This is useful for revlog format that has better compression method.
565 """
565 """
566 assert delta.data[0] == b'u', delta.data[0]
566 assert delta.data[0] == b'u', delta.data[0]
567 return _deltainfo(
567 return _deltainfo(
568 delta.distance,
568 delta.distance,
569 delta.deltalen - 1,
569 delta.deltalen - 1,
570 (b'', delta.data[1]),
570 (b'', delta.data[1]),
571 delta.base,
571 delta.base,
572 delta.chainbase,
572 delta.chainbase,
573 delta.chainlen,
573 delta.chainlen,
574 delta.compresseddeltalen,
574 delta.compresseddeltalen,
575 delta.snapshotdepth,
575 delta.snapshotdepth,
576 )
576 )
577
577
578
578
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
580 """Returns True if the given delta is good. Good means that it is within
580 """Returns True if the given delta is good. Good means that it is within
581 the disk span, disk size, and chain length bounds that we know to be
581 the disk span, disk size, and chain length bounds that we know to be
582 performant."""
582 performant."""
583 if deltainfo is None:
583 if deltainfo is None:
584 return False
584 return False
585
585
586 # - 'deltainfo.distance' is the distance from the base revision --
586 # - 'deltainfo.distance' is the distance from the base revision --
587 # bounding it limits the amount of I/O we need to do.
587 # bounding it limits the amount of I/O we need to do.
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 # deltas we need to apply -- bounding it limits the amount of CPU
589 # deltas we need to apply -- bounding it limits the amount of CPU
590 # we consume.
590 # we consume.
591
591
592 textlen = revinfo.textlen
592 textlen = revinfo.textlen
593 defaultmax = textlen * 4
593 defaultmax = textlen * 4
594 maxdist = revlog._maxdeltachainspan
594 maxdist = revlog._maxdeltachainspan
595 if not maxdist:
595 if not maxdist:
596 maxdist = deltainfo.distance # ensure the conditional pass
596 maxdist = deltainfo.distance # ensure the conditional pass
597 maxdist = max(maxdist, defaultmax)
597 maxdist = max(maxdist, defaultmax)
598
598
599 # Bad delta from read span:
599 # Bad delta from read span:
600 #
600 #
601 # If the span of data read is larger than the maximum allowed.
601 # If the span of data read is larger than the maximum allowed.
602 #
602 #
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 # to avoid issue related to the span of data. In theory, it would be
604 # to avoid issue related to the span of data. In theory, it would be
605 # possible to build pathological revlog where delta pattern would lead
605 # possible to build pathological revlog where delta pattern would lead
606 # to too many reads. However, they do not happen in practice at all. So
606 # to too many reads. However, they do not happen in practice at all. So
607 # we skip the span check entirely.
607 # we skip the span check entirely.
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 return False
609 return False
610
610
611 # Bad delta from new delta size:
611 # Bad delta from new delta size:
612 #
612 #
613 # If the delta size is larger than the target text, storing the
613 # If the delta size is larger than the target text, storing the
614 # delta will be inefficient.
614 # delta will be inefficient.
615 if textlen < deltainfo.deltalen:
615 if textlen < deltainfo.deltalen:
616 return False
616 return False
617
617
618 # Bad delta from cumulated payload size:
618 # Bad delta from cumulated payload size:
619 #
619 #
620 # If the sum of delta get larger than K * target text length.
620 # If the sum of delta get larger than K * target text length.
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 return False
622 return False
623
623
624 # Bad delta from chain length:
624 # Bad delta from chain length:
625 #
625 #
626 # If the number of delta in the chain gets too high.
626 # If the number of delta in the chain gets too high.
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 return False
628 return False
629
629
630 # bad delta from intermediate snapshot size limit
630 # bad delta from intermediate snapshot size limit
631 #
631 #
632 # If an intermediate snapshot size is higher than the limit. The
632 # If an intermediate snapshot size is higher than the limit. The
633 # limit exist to prevent endless chain of intermediate delta to be
633 # limit exist to prevent endless chain of intermediate delta to be
634 # created.
634 # created.
635 if (
635 if (
636 deltainfo.snapshotdepth is not None
636 deltainfo.snapshotdepth is not None
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 ):
638 ):
639 return False
639 return False
640
640
641 # bad delta if new intermediate snapshot is larger than the previous
641 # bad delta if new intermediate snapshot is larger than the previous
642 # snapshot
642 # snapshot
643 if (
643 if (
644 deltainfo.snapshotdepth
644 deltainfo.snapshotdepth
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 ):
646 ):
647 return False
647 return False
648
648
649 return True
649 return True
650
650
651
651
652 # If a revision's full text is that much bigger than a base candidate full
652 # If a revision's full text is that much bigger than a base candidate full
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 # consider these candidates.
654 # consider these candidates.
655 LIMIT_BASE2TEXT = 500
655 LIMIT_BASE2TEXT = 500
656
656
657
657
658 def _candidategroups(
658 def _candidategroups(
659 revlog,
659 revlog,
660 textlen,
660 textlen,
661 p1,
661 p1,
662 p2,
662 p2,
663 cachedelta,
663 cachedelta,
664 excluded_bases=None,
664 excluded_bases=None,
665 target_rev=None,
665 target_rev=None,
666 ):
666 ):
667 """Provides group of revision to be tested as delta base
667 """Provides group of revision to be tested as delta base
668
668
669 This top level function focus on emitting groups with unique and worthwhile
669 This top level function focus on emitting groups with unique and worthwhile
670 content. See _raw_candidate_groups for details about the group order.
670 content. See _raw_candidate_groups for details about the group order.
671 """
671 """
672 # should we try to build a delta?
672 # should we try to build a delta?
673 if not (len(revlog) and revlog._storedeltachains):
673 if not (len(revlog) and revlog._storedeltachains):
674 yield None
674 yield None
675 return
675 return
676
676
677 deltalength = revlog.length
677 deltalength = revlog.length
678 deltaparent = revlog.deltaparent
678 deltaparent = revlog.deltaparent
679 sparse = revlog._sparserevlog
679 sparse = revlog._sparserevlog
680 good = None
680 good = None
681
681
682 deltas_limit = textlen * LIMIT_DELTA2TEXT
682 deltas_limit = textlen * LIMIT_DELTA2TEXT
683
683
684 tested = {nullrev}
684 tested = {nullrev}
685 candidates = _refinedgroups(
685 candidates = _refinedgroups(
686 revlog,
686 revlog,
687 p1,
687 p1,
688 p2,
688 p2,
689 cachedelta,
689 cachedelta,
690 )
690 )
691 while True:
691 while True:
692 temptative = candidates.send(good)
692 temptative = candidates.send(good)
693 if temptative is None:
693 if temptative is None:
694 break
694 break
695 group = []
695 group = []
696 for rev in temptative:
696 for rev in temptative:
697 # skip over empty delta (no need to include them in a chain)
697 # skip over empty delta (no need to include them in a chain)
698 while revlog._generaldelta and not (
698 while revlog._generaldelta and not (
699 rev == nullrev or rev in tested or deltalength(rev)
699 rev == nullrev or rev in tested or deltalength(rev)
700 ):
700 ):
701 tested.add(rev)
701 tested.add(rev)
702 rev = deltaparent(rev)
702 rev = deltaparent(rev)
703 # no need to try a delta against nullrev, this will be done as a
703 # no need to try a delta against nullrev, this will be done as a
704 # last resort.
704 # last resort.
705 if rev == nullrev:
705 if rev == nullrev:
706 continue
706 continue
707 # filter out revision we tested already
707 # filter out revision we tested already
708 if rev in tested:
708 if rev in tested:
709 continue
709 continue
710 tested.add(rev)
711 # an higher authority deamed the base unworthy (e.g. censored)
710 # an higher authority deamed the base unworthy (e.g. censored)
712 if excluded_bases is not None and rev in excluded_bases:
711 if excluded_bases is not None and rev in excluded_bases:
712 tested.add(rev)
713 continue
713 continue
714 # We are in some recomputation cases and that rev is too high in
714 # We are in some recomputation cases and that rev is too high in
715 # the revlog
715 # the revlog
716 if target_rev is not None and rev >= target_rev:
716 if target_rev is not None and rev >= target_rev:
717 tested.add(rev)
717 continue
718 continue
718 # filter out delta base that will never produce good delta
719 # filter out delta base that will never produce good delta
719 if deltas_limit < revlog.length(rev):
720 if deltas_limit < revlog.length(rev):
721 tested.add(rev)
720 continue
722 continue
721 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
723 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
724 tested.add(rev)
722 continue
725 continue
723 # no delta for rawtext-changing revs (see "candelta" for why)
726 # no delta for rawtext-changing revs (see "candelta" for why)
724 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
727 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
728 tested.add(rev)
725 continue
729 continue
726
730
727 # If we reach here, we are about to build and test a delta.
731 # If we reach here, we are about to build and test a delta.
728 # The delta building process will compute the chaininfo in all
732 # The delta building process will compute the chaininfo in all
729 # case, since that computation is cached, it is fine to access it
733 # case, since that computation is cached, it is fine to access it
730 # here too.
734 # here too.
731 chainlen, chainsize = revlog._chaininfo(rev)
735 chainlen, chainsize = revlog._chaininfo(rev)
732 # if chain will be too long, skip base
736 # if chain will be too long, skip base
733 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
737 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
738 tested.add(rev)
734 continue
739 continue
735 # if chain already have too much data, skip base
740 # if chain already have too much data, skip base
736 if deltas_limit < chainsize:
741 if deltas_limit < chainsize:
742 tested.add(rev)
737 continue
743 continue
738 if sparse and revlog.upperboundcomp is not None:
744 if sparse and revlog.upperboundcomp is not None:
739 maxcomp = revlog.upperboundcomp
745 maxcomp = revlog.upperboundcomp
740 basenotsnap = (p1, p2, nullrev)
746 basenotsnap = (p1, p2, nullrev)
741 if rev not in basenotsnap and revlog.issnapshot(rev):
747 if rev not in basenotsnap and revlog.issnapshot(rev):
742 snapshotdepth = revlog.snapshotdepth(rev)
748 snapshotdepth = revlog.snapshotdepth(rev)
743 # If text is significantly larger than the base, we can
749 # If text is significantly larger than the base, we can
744 # expect the resulting delta to be proportional to the size
750 # expect the resulting delta to be proportional to the size
745 # difference
751 # difference
746 revsize = revlog.rawsize(rev)
752 revsize = revlog.rawsize(rev)
747 rawsizedistance = max(textlen - revsize, 0)
753 rawsizedistance = max(textlen - revsize, 0)
748 # use an estimate of the compression upper bound.
754 # use an estimate of the compression upper bound.
749 lowestrealisticdeltalen = rawsizedistance // maxcomp
755 lowestrealisticdeltalen = rawsizedistance // maxcomp
750
756
751 # check the absolute constraint on the delta size
757 # check the absolute constraint on the delta size
752 snapshotlimit = textlen >> snapshotdepth
758 snapshotlimit = textlen >> snapshotdepth
753 if snapshotlimit < lowestrealisticdeltalen:
759 if snapshotlimit < lowestrealisticdeltalen:
754 # delta lower bound is larger than accepted upper bound
760 # delta lower bound is larger than accepted upper bound
761 tested.add(rev)
755 continue
762 continue
756
763
757 # check the relative constraint on the delta size
764 # check the relative constraint on the delta size
758 revlength = revlog.length(rev)
765 revlength = revlog.length(rev)
759 if revlength < lowestrealisticdeltalen:
766 if revlength < lowestrealisticdeltalen:
760 # delta probable lower bound is larger than target base
767 # delta probable lower bound is larger than target base
768 tested.add(rev)
761 continue
769 continue
762
770
763 group.append(rev)
771 group.append(rev)
764 if group:
772 if group:
765 # XXX: in the sparse revlog case, group can become large,
773 # XXX: in the sparse revlog case, group can become large,
766 # impacting performances. Some bounding or slicing mecanism
774 # impacting performances. Some bounding or slicing mecanism
767 # would help to reduce this impact.
775 # would help to reduce this impact.
776 tested.update(group)
768 good = yield tuple(group)
777 good = yield tuple(group)
769 yield None
778 yield None
770
779
771
780
772 def _findsnapshots(revlog, cache, start_rev):
781 def _findsnapshots(revlog, cache, start_rev):
773 """find snapshot from start_rev to tip"""
782 """find snapshot from start_rev to tip"""
774 if util.safehasattr(revlog.index, b'findsnapshots'):
783 if util.safehasattr(revlog.index, b'findsnapshots'):
775 revlog.index.findsnapshots(cache, start_rev)
784 revlog.index.findsnapshots(cache, start_rev)
776 else:
785 else:
777 deltaparent = revlog.deltaparent
786 deltaparent = revlog.deltaparent
778 issnapshot = revlog.issnapshot
787 issnapshot = revlog.issnapshot
779 for rev in revlog.revs(start_rev):
788 for rev in revlog.revs(start_rev):
780 if issnapshot(rev):
789 if issnapshot(rev):
781 cache[deltaparent(rev)].append(rev)
790 cache[deltaparent(rev)].append(rev)
782
791
783
792
784 def _refinedgroups(revlog, p1, p2, cachedelta):
793 def _refinedgroups(revlog, p1, p2, cachedelta):
785 good = None
794 good = None
786 # First we try to reuse a the delta contained in the bundle.
795 # First we try to reuse a the delta contained in the bundle.
787 # (or from the source revlog)
796 # (or from the source revlog)
788 #
797 #
789 # This logic only applies to general delta repositories and can be disabled
798 # This logic only applies to general delta repositories and can be disabled
790 # through configuration. Disabling reuse source delta is useful when
799 # through configuration. Disabling reuse source delta is useful when
791 # we want to make sure we recomputed "optimal" deltas.
800 # we want to make sure we recomputed "optimal" deltas.
792 debug_info = None
801 debug_info = None
793 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
802 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
794 # Assume what we received from the server is a good choice
803 # Assume what we received from the server is a good choice
795 # build delta will reuse the cache
804 # build delta will reuse the cache
796 if debug_info is not None:
805 if debug_info is not None:
797 debug_info['cached-delta.tested'] += 1
806 debug_info['cached-delta.tested'] += 1
798 good = yield (cachedelta[0],)
807 good = yield (cachedelta[0],)
799 if good is not None:
808 if good is not None:
800 if debug_info is not None:
809 if debug_info is not None:
801 debug_info['cached-delta.accepted'] += 1
810 debug_info['cached-delta.accepted'] += 1
802 yield None
811 yield None
803 return
812 return
804 # XXX cache me higher
813 # XXX cache me higher
805 snapshots = collections.defaultdict(list)
814 snapshots = collections.defaultdict(list)
806 groups = _rawgroups(
815 groups = _rawgroups(
807 revlog,
816 revlog,
808 p1,
817 p1,
809 p2,
818 p2,
810 cachedelta,
819 cachedelta,
811 snapshots,
820 snapshots,
812 )
821 )
813 for candidates in groups:
822 for candidates in groups:
814 good = yield candidates
823 good = yield candidates
815 if good is not None:
824 if good is not None:
816 break
825 break
817
826
818 # If sparse revlog is enabled, we can try to refine the available deltas
827 # If sparse revlog is enabled, we can try to refine the available deltas
819 if not revlog._sparserevlog:
828 if not revlog._sparserevlog:
820 yield None
829 yield None
821 return
830 return
822
831
823 # if we have a refinable value, try to refine it
832 # if we have a refinable value, try to refine it
824 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
833 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
825 # refine snapshot down
834 # refine snapshot down
826 previous = None
835 previous = None
827 while previous != good:
836 while previous != good:
828 previous = good
837 previous = good
829 base = revlog.deltaparent(good)
838 base = revlog.deltaparent(good)
830 if base == nullrev:
839 if base == nullrev:
831 break
840 break
832 good = yield (base,)
841 good = yield (base,)
833 # refine snapshot up
842 # refine snapshot up
834 if not snapshots:
843 if not snapshots:
835 _findsnapshots(revlog, snapshots, good + 1)
844 _findsnapshots(revlog, snapshots, good + 1)
836 previous = None
845 previous = None
837 while good != previous:
846 while good != previous:
838 previous = good
847 previous = good
839 children = tuple(sorted(c for c in snapshots[good]))
848 children = tuple(sorted(c for c in snapshots[good]))
840 good = yield children
849 good = yield children
841
850
842 if debug_info is not None:
851 if debug_info is not None:
843 if good is None:
852 if good is None:
844 debug_info['no-solution'] += 1
853 debug_info['no-solution'] += 1
845
854
846 yield None
855 yield None
847
856
848
857
849 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
858 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
850 """Provides group of revision to be tested as delta base
859 """Provides group of revision to be tested as delta base
851
860
852 This lower level function focus on emitting delta theorically interresting
861 This lower level function focus on emitting delta theorically interresting
853 without looking it any practical details.
862 without looking it any practical details.
854
863
855 The group order aims at providing fast or small candidates first.
864 The group order aims at providing fast or small candidates first.
856 """
865 """
857 gdelta = revlog._generaldelta
866 gdelta = revlog._generaldelta
858 # gate sparse behind general-delta because of issue6056
867 # gate sparse behind general-delta because of issue6056
859 sparse = gdelta and revlog._sparserevlog
868 sparse = gdelta and revlog._sparserevlog
860 curr = len(revlog)
869 curr = len(revlog)
861 prev = curr - 1
870 prev = curr - 1
862 deltachain = lambda rev: revlog._deltachain(rev)[0]
871 deltachain = lambda rev: revlog._deltachain(rev)[0]
863
872
864 if gdelta:
873 if gdelta:
865 # exclude already lazy tested base if any
874 # exclude already lazy tested base if any
866 parents = [p for p in (p1, p2) if p != nullrev]
875 parents = [p for p in (p1, p2) if p != nullrev]
867
876
868 if not revlog._deltabothparents and len(parents) == 2:
877 if not revlog._deltabothparents and len(parents) == 2:
869 parents.sort()
878 parents.sort()
870 # To minimize the chance of having to build a fulltext,
879 # To minimize the chance of having to build a fulltext,
871 # pick first whichever parent is closest to us (max rev)
880 # pick first whichever parent is closest to us (max rev)
872 yield (parents[1],)
881 yield (parents[1],)
873 # then the other one (min rev) if the first did not fit
882 # then the other one (min rev) if the first did not fit
874 yield (parents[0],)
883 yield (parents[0],)
875 elif len(parents) > 0:
884 elif len(parents) > 0:
876 # Test all parents (1 or 2), and keep the best candidate
885 # Test all parents (1 or 2), and keep the best candidate
877 yield parents
886 yield parents
878
887
879 if sparse and parents:
888 if sparse and parents:
880 if snapshots is None:
889 if snapshots is None:
881 # map: base-rev: [snapshot-revs]
890 # map: base-rev: [snapshot-revs]
882 snapshots = collections.defaultdict(list)
891 snapshots = collections.defaultdict(list)
883 # See if we can use an existing snapshot in the parent chains to use as
892 # See if we can use an existing snapshot in the parent chains to use as
884 # a base for a new intermediate-snapshot
893 # a base for a new intermediate-snapshot
885 #
894 #
886 # search for snapshot in parents delta chain
895 # search for snapshot in parents delta chain
887 # map: snapshot-level: snapshot-rev
896 # map: snapshot-level: snapshot-rev
888 parents_snaps = collections.defaultdict(set)
897 parents_snaps = collections.defaultdict(set)
889 candidate_chains = [deltachain(p) for p in parents]
898 candidate_chains = [deltachain(p) for p in parents]
890 for chain in candidate_chains:
899 for chain in candidate_chains:
891 for idx, s in enumerate(chain):
900 for idx, s in enumerate(chain):
892 if not revlog.issnapshot(s):
901 if not revlog.issnapshot(s):
893 break
902 break
894 parents_snaps[idx].add(s)
903 parents_snaps[idx].add(s)
895 snapfloor = min(parents_snaps[0]) + 1
904 snapfloor = min(parents_snaps[0]) + 1
896 _findsnapshots(revlog, snapshots, snapfloor)
905 _findsnapshots(revlog, snapshots, snapfloor)
897 # search for the highest "unrelated" revision
906 # search for the highest "unrelated" revision
898 #
907 #
899 # Adding snapshots used by "unrelated" revision increase the odd we
908 # Adding snapshots used by "unrelated" revision increase the odd we
900 # reuse an independant, yet better snapshot chain.
909 # reuse an independant, yet better snapshot chain.
901 #
910 #
902 # XXX instead of building a set of revisions, we could lazily enumerate
911 # XXX instead of building a set of revisions, we could lazily enumerate
903 # over the chains. That would be more efficient, however we stick to
912 # over the chains. That would be more efficient, however we stick to
904 # simple code for now.
913 # simple code for now.
905 all_revs = set()
914 all_revs = set()
906 for chain in candidate_chains:
915 for chain in candidate_chains:
907 all_revs.update(chain)
916 all_revs.update(chain)
908 other = None
917 other = None
909 for r in revlog.revs(prev, snapfloor):
918 for r in revlog.revs(prev, snapfloor):
910 if r not in all_revs:
919 if r not in all_revs:
911 other = r
920 other = r
912 break
921 break
913 if other is not None:
922 if other is not None:
914 # To avoid unfair competition, we won't use unrelated intermediate
923 # To avoid unfair competition, we won't use unrelated intermediate
915 # snapshot that are deeper than the ones from the parent delta
924 # snapshot that are deeper than the ones from the parent delta
916 # chain.
925 # chain.
917 max_depth = max(parents_snaps.keys())
926 max_depth = max(parents_snaps.keys())
918 chain = deltachain(other)
927 chain = deltachain(other)
919 for depth, s in enumerate(chain):
928 for depth, s in enumerate(chain):
920 if s < snapfloor:
929 if s < snapfloor:
921 continue
930 continue
922 if max_depth < depth:
931 if max_depth < depth:
923 break
932 break
924 if not revlog.issnapshot(s):
933 if not revlog.issnapshot(s):
925 break
934 break
926 parents_snaps[depth].add(s)
935 parents_snaps[depth].add(s)
927 # Test them as possible intermediate snapshot base
936 # Test them as possible intermediate snapshot base
928 # We test them from highest to lowest level. High level one are more
937 # We test them from highest to lowest level. High level one are more
929 # likely to result in small delta
938 # likely to result in small delta
930 floor = None
939 floor = None
931 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
940 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
932 siblings = set()
941 siblings = set()
933 for s in snaps:
942 for s in snaps:
934 siblings.update(snapshots[s])
943 siblings.update(snapshots[s])
935 # Before considering making a new intermediate snapshot, we check
944 # Before considering making a new intermediate snapshot, we check
936 # if an existing snapshot, children of base we consider, would be
945 # if an existing snapshot, children of base we consider, would be
937 # suitable.
946 # suitable.
938 #
947 #
939 # It give a change to reuse a delta chain "unrelated" to the
948 # It give a change to reuse a delta chain "unrelated" to the
940 # current revision instead of starting our own. Without such
949 # current revision instead of starting our own. Without such
941 # re-use, topological branches would keep reopening new chains.
950 # re-use, topological branches would keep reopening new chains.
942 # Creating more and more snapshot as the repository grow.
951 # Creating more and more snapshot as the repository grow.
943
952
944 if floor is not None:
953 if floor is not None:
945 # We only do this for siblings created after the one in our
954 # We only do this for siblings created after the one in our
946 # parent's delta chain. Those created before has less chances
955 # parent's delta chain. Those created before has less chances
947 # to be valid base since our ancestors had to create a new
956 # to be valid base since our ancestors had to create a new
948 # snapshot.
957 # snapshot.
949 siblings = [r for r in siblings if floor < r]
958 siblings = [r for r in siblings if floor < r]
950 yield tuple(sorted(siblings))
959 yield tuple(sorted(siblings))
951 # then test the base from our parent's delta chain.
960 # then test the base from our parent's delta chain.
952 yield tuple(sorted(snaps))
961 yield tuple(sorted(snaps))
953 floor = min(snaps)
962 floor = min(snaps)
954 # No suitable base found in the parent chain, search if any full
963 # No suitable base found in the parent chain, search if any full
955 # snapshots emitted since parent's base would be a suitable base for an
964 # snapshots emitted since parent's base would be a suitable base for an
956 # intermediate snapshot.
965 # intermediate snapshot.
957 #
966 #
958 # It give a chance to reuse a delta chain unrelated to the current
967 # It give a chance to reuse a delta chain unrelated to the current
959 # revisions instead of starting our own. Without such re-use,
968 # revisions instead of starting our own. Without such re-use,
960 # topological branches would keep reopening new full chains. Creating
969 # topological branches would keep reopening new full chains. Creating
961 # more and more snapshot as the repository grow.
970 # more and more snapshot as the repository grow.
962 yield tuple(snapshots[nullrev])
971 yield tuple(snapshots[nullrev])
963
972
964 if not sparse:
973 if not sparse:
965 # other approach failed try against prev to hopefully save us a
974 # other approach failed try against prev to hopefully save us a
966 # fulltext.
975 # fulltext.
967 yield (prev,)
976 yield (prev,)
968
977
969
978
970 class deltacomputer:
979 class deltacomputer:
971 def __init__(
980 def __init__(
972 self,
981 self,
973 revlog,
982 revlog,
974 write_debug=None,
983 write_debug=None,
975 debug_search=False,
984 debug_search=False,
976 debug_info=None,
985 debug_info=None,
977 ):
986 ):
978 self.revlog = revlog
987 self.revlog = revlog
979 self._write_debug = write_debug
988 self._write_debug = write_debug
980 self._debug_search = debug_search
989 self._debug_search = debug_search
981 self._debug_info = debug_info
990 self._debug_info = debug_info
982
991
983 def buildtext(self, revinfo, fh):
992 def buildtext(self, revinfo, fh):
984 """Builds a fulltext version of a revision
993 """Builds a fulltext version of a revision
985
994
986 revinfo: revisioninfo instance that contains all needed info
995 revinfo: revisioninfo instance that contains all needed info
987 fh: file handle to either the .i or the .d revlog file,
996 fh: file handle to either the .i or the .d revlog file,
988 depending on whether it is inlined or not
997 depending on whether it is inlined or not
989 """
998 """
990 btext = revinfo.btext
999 btext = revinfo.btext
991 if btext[0] is not None:
1000 if btext[0] is not None:
992 return btext[0]
1001 return btext[0]
993
1002
994 revlog = self.revlog
1003 revlog = self.revlog
995 cachedelta = revinfo.cachedelta
1004 cachedelta = revinfo.cachedelta
996 baserev = cachedelta[0]
1005 baserev = cachedelta[0]
997 delta = cachedelta[1]
1006 delta = cachedelta[1]
998
1007
999 fulltext = btext[0] = _textfromdelta(
1008 fulltext = btext[0] = _textfromdelta(
1000 fh,
1009 fh,
1001 revlog,
1010 revlog,
1002 baserev,
1011 baserev,
1003 delta,
1012 delta,
1004 revinfo.p1,
1013 revinfo.p1,
1005 revinfo.p2,
1014 revinfo.p2,
1006 revinfo.flags,
1015 revinfo.flags,
1007 revinfo.node,
1016 revinfo.node,
1008 )
1017 )
1009 return fulltext
1018 return fulltext
1010
1019
1011 def _builddeltadiff(self, base, revinfo, fh):
1020 def _builddeltadiff(self, base, revinfo, fh):
1012 revlog = self.revlog
1021 revlog = self.revlog
1013 t = self.buildtext(revinfo, fh)
1022 t = self.buildtext(revinfo, fh)
1014 if revlog.iscensored(base):
1023 if revlog.iscensored(base):
1015 # deltas based on a censored revision must replace the
1024 # deltas based on a censored revision must replace the
1016 # full content in one patch, so delta works everywhere
1025 # full content in one patch, so delta works everywhere
1017 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1026 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1018 delta = header + t
1027 delta = header + t
1019 else:
1028 else:
1020 ptext = revlog.rawdata(base, _df=fh)
1029 ptext = revlog.rawdata(base, _df=fh)
1021 delta = mdiff.textdiff(ptext, t)
1030 delta = mdiff.textdiff(ptext, t)
1022
1031
1023 return delta
1032 return delta
1024
1033
1025 def _builddeltainfo(self, revinfo, base, fh):
1034 def _builddeltainfo(self, revinfo, base, fh):
1026 # can we use the cached delta?
1035 # can we use the cached delta?
1027 revlog = self.revlog
1036 revlog = self.revlog
1028 debug_search = self._write_debug is not None and self._debug_search
1037 debug_search = self._write_debug is not None and self._debug_search
1029 chainbase = revlog.chainbase(base)
1038 chainbase = revlog.chainbase(base)
1030 if revlog._generaldelta:
1039 if revlog._generaldelta:
1031 deltabase = base
1040 deltabase = base
1032 else:
1041 else:
1033 deltabase = chainbase
1042 deltabase = chainbase
1034 snapshotdepth = None
1043 snapshotdepth = None
1035 if revlog._sparserevlog and deltabase == nullrev:
1044 if revlog._sparserevlog and deltabase == nullrev:
1036 snapshotdepth = 0
1045 snapshotdepth = 0
1037 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1046 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1038 # A delta chain should always be one full snapshot,
1047 # A delta chain should always be one full snapshot,
1039 # zero or more semi-snapshots, and zero or more deltas
1048 # zero or more semi-snapshots, and zero or more deltas
1040 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1049 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1041 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1050 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1042 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1051 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1043 delta = None
1052 delta = None
1044 if revinfo.cachedelta:
1053 if revinfo.cachedelta:
1045 cachebase, cachediff = revinfo.cachedelta
1054 cachebase, cachediff = revinfo.cachedelta
1046 # check if the diff still apply
1055 # check if the diff still apply
1047 currentbase = cachebase
1056 currentbase = cachebase
1048 while (
1057 while (
1049 currentbase != nullrev
1058 currentbase != nullrev
1050 and currentbase != base
1059 and currentbase != base
1051 and self.revlog.length(currentbase) == 0
1060 and self.revlog.length(currentbase) == 0
1052 ):
1061 ):
1053 currentbase = self.revlog.deltaparent(currentbase)
1062 currentbase = self.revlog.deltaparent(currentbase)
1054 if self.revlog._lazydelta and currentbase == base:
1063 if self.revlog._lazydelta and currentbase == base:
1055 delta = revinfo.cachedelta[1]
1064 delta = revinfo.cachedelta[1]
1056 if delta is None:
1065 if delta is None:
1057 delta = self._builddeltadiff(base, revinfo, fh)
1066 delta = self._builddeltadiff(base, revinfo, fh)
1058 if debug_search:
1067 if debug_search:
1059 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1068 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1060 msg %= len(delta)
1069 msg %= len(delta)
1061 self._write_debug(msg)
1070 self._write_debug(msg)
1062 # snapshotdept need to be neither None nor 0 level snapshot
1071 # snapshotdept need to be neither None nor 0 level snapshot
1063 if revlog.upperboundcomp is not None and snapshotdepth:
1072 if revlog.upperboundcomp is not None and snapshotdepth:
1064 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1073 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1065 snapshotlimit = revinfo.textlen >> snapshotdepth
1074 snapshotlimit = revinfo.textlen >> snapshotdepth
1066 if debug_search:
1075 if debug_search:
1067 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1076 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1068 msg %= lowestrealisticdeltalen
1077 msg %= lowestrealisticdeltalen
1069 self._write_debug(msg)
1078 self._write_debug(msg)
1070 if snapshotlimit < lowestrealisticdeltalen:
1079 if snapshotlimit < lowestrealisticdeltalen:
1071 if debug_search:
1080 if debug_search:
1072 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1081 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1073 self._write_debug(msg)
1082 self._write_debug(msg)
1074 return None
1083 return None
1075 if revlog.length(base) < lowestrealisticdeltalen:
1084 if revlog.length(base) < lowestrealisticdeltalen:
1076 if debug_search:
1085 if debug_search:
1077 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1086 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1078 self._write_debug(msg)
1087 self._write_debug(msg)
1079 return None
1088 return None
1080 header, data = revlog.compress(delta)
1089 header, data = revlog.compress(delta)
1081 deltalen = len(header) + len(data)
1090 deltalen = len(header) + len(data)
1082 offset = revlog.end(len(revlog) - 1)
1091 offset = revlog.end(len(revlog) - 1)
1083 dist = deltalen + offset - revlog.start(chainbase)
1092 dist = deltalen + offset - revlog.start(chainbase)
1084 chainlen, compresseddeltalen = revlog._chaininfo(base)
1093 chainlen, compresseddeltalen = revlog._chaininfo(base)
1085 chainlen += 1
1094 chainlen += 1
1086 compresseddeltalen += deltalen
1095 compresseddeltalen += deltalen
1087
1096
1088 return _deltainfo(
1097 return _deltainfo(
1089 dist,
1098 dist,
1090 deltalen,
1099 deltalen,
1091 (header, data),
1100 (header, data),
1092 deltabase,
1101 deltabase,
1093 chainbase,
1102 chainbase,
1094 chainlen,
1103 chainlen,
1095 compresseddeltalen,
1104 compresseddeltalen,
1096 snapshotdepth,
1105 snapshotdepth,
1097 )
1106 )
1098
1107
1099 def _fullsnapshotinfo(self, fh, revinfo, curr):
1108 def _fullsnapshotinfo(self, fh, revinfo, curr):
1100 rawtext = self.buildtext(revinfo, fh)
1109 rawtext = self.buildtext(revinfo, fh)
1101 data = self.revlog.compress(rawtext)
1110 data = self.revlog.compress(rawtext)
1102 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1111 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1103 deltabase = chainbase = curr
1112 deltabase = chainbase = curr
1104 snapshotdepth = 0
1113 snapshotdepth = 0
1105 chainlen = 1
1114 chainlen = 1
1106
1115
1107 return _deltainfo(
1116 return _deltainfo(
1108 dist,
1117 dist,
1109 deltalen,
1118 deltalen,
1110 data,
1119 data,
1111 deltabase,
1120 deltabase,
1112 chainbase,
1121 chainbase,
1113 chainlen,
1122 chainlen,
1114 compresseddeltalen,
1123 compresseddeltalen,
1115 snapshotdepth,
1124 snapshotdepth,
1116 )
1125 )
1117
1126
1118 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1127 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1119 """Find an acceptable delta against a candidate revision
1128 """Find an acceptable delta against a candidate revision
1120
1129
1121 revinfo: information about the revision (instance of _revisioninfo)
1130 revinfo: information about the revision (instance of _revisioninfo)
1122 fh: file handle to either the .i or the .d revlog file,
1131 fh: file handle to either the .i or the .d revlog file,
1123 depending on whether it is inlined or not
1132 depending on whether it is inlined or not
1124
1133
1125 Returns the first acceptable candidate revision, as ordered by
1134 Returns the first acceptable candidate revision, as ordered by
1126 _candidategroups
1135 _candidategroups
1127
1136
1128 If no suitable deltabase is found, we return delta info for a full
1137 If no suitable deltabase is found, we return delta info for a full
1129 snapshot.
1138 snapshot.
1130
1139
1131 `excluded_bases` is an optional set of revision that cannot be used as
1140 `excluded_bases` is an optional set of revision that cannot be used as
1132 a delta base. Use this to recompute delta suitable in censor or strip
1141 a delta base. Use this to recompute delta suitable in censor or strip
1133 context.
1142 context.
1134 """
1143 """
1135 if target_rev is None:
1144 if target_rev is None:
1136 target_rev = len(self.revlog)
1145 target_rev = len(self.revlog)
1137
1146
1138 if not revinfo.textlen:
1147 if not revinfo.textlen:
1139 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1148 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1140
1149
1141 if excluded_bases is None:
1150 if excluded_bases is None:
1142 excluded_bases = set()
1151 excluded_bases = set()
1143
1152
1144 # no delta for flag processor revision (see "candelta" for why)
1153 # no delta for flag processor revision (see "candelta" for why)
1145 # not calling candelta since only one revision needs test, also to
1154 # not calling candelta since only one revision needs test, also to
1146 # avoid overhead fetching flags again.
1155 # avoid overhead fetching flags again.
1147 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1156 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1148 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1157 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1149
1158
1150 gather_debug = (
1159 gather_debug = (
1151 self._write_debug is not None or self._debug_info is not None
1160 self._write_debug is not None or self._debug_info is not None
1152 )
1161 )
1153 debug_search = self._write_debug is not None and self._debug_search
1162 debug_search = self._write_debug is not None and self._debug_search
1154
1163
1155 if gather_debug:
1164 if gather_debug:
1156 start = util.timer()
1165 start = util.timer()
1157
1166
1158 # count the number of different delta we tried (for debug purpose)
1167 # count the number of different delta we tried (for debug purpose)
1159 dbg_try_count = 0
1168 dbg_try_count = 0
1160 # count the number of "search round" we did. (for debug purpose)
1169 # count the number of "search round" we did. (for debug purpose)
1161 dbg_try_rounds = 0
1170 dbg_try_rounds = 0
1162 dbg_type = b'unknown'
1171 dbg_type = b'unknown'
1163
1172
1164 cachedelta = revinfo.cachedelta
1173 cachedelta = revinfo.cachedelta
1165 p1 = revinfo.p1
1174 p1 = revinfo.p1
1166 p2 = revinfo.p2
1175 p2 = revinfo.p2
1167 revlog = self.revlog
1176 revlog = self.revlog
1168
1177
1169 deltainfo = None
1178 deltainfo = None
1170 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1179 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1171
1180
1172 if gather_debug:
1181 if gather_debug:
1173 if p1r != nullrev:
1182 if p1r != nullrev:
1174 p1_chain_len = revlog._chaininfo(p1r)[0]
1183 p1_chain_len = revlog._chaininfo(p1r)[0]
1175 else:
1184 else:
1176 p1_chain_len = -1
1185 p1_chain_len = -1
1177 if p2r != nullrev:
1186 if p2r != nullrev:
1178 p2_chain_len = revlog._chaininfo(p2r)[0]
1187 p2_chain_len = revlog._chaininfo(p2r)[0]
1179 else:
1188 else:
1180 p2_chain_len = -1
1189 p2_chain_len = -1
1181 if debug_search:
1190 if debug_search:
1182 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1191 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1183 msg %= target_rev
1192 msg %= target_rev
1184 self._write_debug(msg)
1193 self._write_debug(msg)
1185
1194
1186 groups = _candidategroups(
1195 groups = _candidategroups(
1187 self.revlog,
1196 self.revlog,
1188 revinfo.textlen,
1197 revinfo.textlen,
1189 p1r,
1198 p1r,
1190 p2r,
1199 p2r,
1191 cachedelta,
1200 cachedelta,
1192 excluded_bases,
1201 excluded_bases,
1193 target_rev,
1202 target_rev,
1194 )
1203 )
1195 candidaterevs = next(groups)
1204 candidaterevs = next(groups)
1196 while candidaterevs is not None:
1205 while candidaterevs is not None:
1197 dbg_try_rounds += 1
1206 dbg_try_rounds += 1
1198 if debug_search:
1207 if debug_search:
1199 prev = None
1208 prev = None
1200 if deltainfo is not None:
1209 if deltainfo is not None:
1201 prev = deltainfo.base
1210 prev = deltainfo.base
1202
1211
1203 if (
1212 if (
1204 cachedelta is not None
1213 cachedelta is not None
1205 and len(candidaterevs) == 1
1214 and len(candidaterevs) == 1
1206 and cachedelta[0] in candidaterevs
1215 and cachedelta[0] in candidaterevs
1207 ):
1216 ):
1208 round_type = b"cached-delta"
1217 round_type = b"cached-delta"
1209 elif p1 in candidaterevs or p2 in candidaterevs:
1218 elif p1 in candidaterevs or p2 in candidaterevs:
1210 round_type = b"parents"
1219 round_type = b"parents"
1211 elif prev is not None and all(c < prev for c in candidaterevs):
1220 elif prev is not None and all(c < prev for c in candidaterevs):
1212 round_type = b"refine-down"
1221 round_type = b"refine-down"
1213 elif prev is not None and all(c > prev for c in candidaterevs):
1222 elif prev is not None and all(c > prev for c in candidaterevs):
1214 round_type = b"refine-up"
1223 round_type = b"refine-up"
1215 else:
1224 else:
1216 round_type = b"search-down"
1225 round_type = b"search-down"
1217 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1226 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1218 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1227 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1219 self._write_debug(msg)
1228 self._write_debug(msg)
1220 nominateddeltas = []
1229 nominateddeltas = []
1221 if deltainfo is not None:
1230 if deltainfo is not None:
1222 if debug_search:
1231 if debug_search:
1223 msg = (
1232 msg = (
1224 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1233 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1225 )
1234 )
1226 msg %= (deltainfo.base, deltainfo.deltalen)
1235 msg %= (deltainfo.base, deltainfo.deltalen)
1227 self._write_debug(msg)
1236 self._write_debug(msg)
1228 # if we already found a good delta,
1237 # if we already found a good delta,
1229 # challenge it against refined candidates
1238 # challenge it against refined candidates
1230 nominateddeltas.append(deltainfo)
1239 nominateddeltas.append(deltainfo)
1231 for candidaterev in candidaterevs:
1240 for candidaterev in candidaterevs:
1232 if debug_search:
1241 if debug_search:
1233 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1242 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1234 msg %= candidaterev
1243 msg %= candidaterev
1235 self._write_debug(msg)
1244 self._write_debug(msg)
1236 candidate_type = None
1245 candidate_type = None
1237 if candidaterev == p1:
1246 if candidaterev == p1:
1238 candidate_type = b"p1"
1247 candidate_type = b"p1"
1239 elif candidaterev == p2:
1248 elif candidaterev == p2:
1240 candidate_type = b"p2"
1249 candidate_type = b"p2"
1241 elif self.revlog.issnapshot(candidaterev):
1250 elif self.revlog.issnapshot(candidaterev):
1242 candidate_type = b"snapshot-%d"
1251 candidate_type = b"snapshot-%d"
1243 candidate_type %= self.revlog.snapshotdepth(
1252 candidate_type %= self.revlog.snapshotdepth(
1244 candidaterev
1253 candidaterev
1245 )
1254 )
1246
1255
1247 if candidate_type is not None:
1256 if candidate_type is not None:
1248 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1257 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1249 msg %= candidate_type
1258 msg %= candidate_type
1250 self._write_debug(msg)
1259 self._write_debug(msg)
1251 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1260 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1252 msg %= self.revlog.length(candidaterev)
1261 msg %= self.revlog.length(candidaterev)
1253 self._write_debug(msg)
1262 self._write_debug(msg)
1254 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1263 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1255 msg %= self.revlog.deltaparent(candidaterev)
1264 msg %= self.revlog.deltaparent(candidaterev)
1256 self._write_debug(msg)
1265 self._write_debug(msg)
1257
1266
1258 dbg_try_count += 1
1267 dbg_try_count += 1
1259
1268
1260 if debug_search:
1269 if debug_search:
1261 delta_start = util.timer()
1270 delta_start = util.timer()
1262 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1271 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1263 if debug_search:
1272 if debug_search:
1264 delta_end = util.timer()
1273 delta_end = util.timer()
1265 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1274 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1266 msg %= delta_end - delta_start
1275 msg %= delta_end - delta_start
1267 self._write_debug(msg)
1276 self._write_debug(msg)
1268 if candidatedelta is not None:
1277 if candidatedelta is not None:
1269 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1278 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1270 if debug_search:
1279 if debug_search:
1271 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1280 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1272 msg %= candidatedelta.deltalen
1281 msg %= candidatedelta.deltalen
1273 self._write_debug(msg)
1282 self._write_debug(msg)
1274 nominateddeltas.append(candidatedelta)
1283 nominateddeltas.append(candidatedelta)
1275 elif debug_search:
1284 elif debug_search:
1276 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1285 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1277 msg %= candidatedelta.deltalen
1286 msg %= candidatedelta.deltalen
1278 self._write_debug(msg)
1287 self._write_debug(msg)
1279 elif debug_search:
1288 elif debug_search:
1280 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1289 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1281 self._write_debug(msg)
1290 self._write_debug(msg)
1282 if nominateddeltas:
1291 if nominateddeltas:
1283 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1292 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1284 if deltainfo is not None:
1293 if deltainfo is not None:
1285 candidaterevs = groups.send(deltainfo.base)
1294 candidaterevs = groups.send(deltainfo.base)
1286 else:
1295 else:
1287 candidaterevs = next(groups)
1296 candidaterevs = next(groups)
1288
1297
1289 if deltainfo is None:
1298 if deltainfo is None:
1290 dbg_type = b"full"
1299 dbg_type = b"full"
1291 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1300 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1292 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1301 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1293 dbg_type = b"snapshot"
1302 dbg_type = b"snapshot"
1294 else:
1303 else:
1295 dbg_type = b"delta"
1304 dbg_type = b"delta"
1296
1305
1297 if gather_debug:
1306 if gather_debug:
1298 end = util.timer()
1307 end = util.timer()
1299 used_cached = (
1308 used_cached = (
1300 cachedelta is not None
1309 cachedelta is not None
1301 and dbg_try_rounds == 1
1310 and dbg_try_rounds == 1
1302 and dbg_try_count == 1
1311 and dbg_try_count == 1
1303 and deltainfo.base == cachedelta[0]
1312 and deltainfo.base == cachedelta[0]
1304 )
1313 )
1305 dbg = {
1314 dbg = {
1306 'duration': end - start,
1315 'duration': end - start,
1307 'revision': target_rev,
1316 'revision': target_rev,
1308 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1317 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1309 'search_round_count': dbg_try_rounds,
1318 'search_round_count': dbg_try_rounds,
1310 'using-cached-base': used_cached,
1319 'using-cached-base': used_cached,
1311 'delta_try_count': dbg_try_count,
1320 'delta_try_count': dbg_try_count,
1312 'type': dbg_type,
1321 'type': dbg_type,
1313 'p1-chain-len': p1_chain_len,
1322 'p1-chain-len': p1_chain_len,
1314 'p2-chain-len': p2_chain_len,
1323 'p2-chain-len': p2_chain_len,
1315 }
1324 }
1316 if (
1325 if (
1317 deltainfo.snapshotdepth # pytype: disable=attribute-error
1326 deltainfo.snapshotdepth # pytype: disable=attribute-error
1318 is not None
1327 is not None
1319 ):
1328 ):
1320 dbg[
1329 dbg[
1321 'snapshot-depth'
1330 'snapshot-depth'
1322 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1331 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1323 else:
1332 else:
1324 dbg['snapshot-depth'] = 0
1333 dbg['snapshot-depth'] = 0
1325 target_revlog = b"UNKNOWN"
1334 target_revlog = b"UNKNOWN"
1326 target_type = self.revlog.target[0]
1335 target_type = self.revlog.target[0]
1327 target_key = self.revlog.target[1]
1336 target_key = self.revlog.target[1]
1328 if target_type == KIND_CHANGELOG:
1337 if target_type == KIND_CHANGELOG:
1329 target_revlog = b'CHANGELOG:'
1338 target_revlog = b'CHANGELOG:'
1330 elif target_type == KIND_MANIFESTLOG:
1339 elif target_type == KIND_MANIFESTLOG:
1331 target_revlog = b'MANIFESTLOG:'
1340 target_revlog = b'MANIFESTLOG:'
1332 if target_key:
1341 if target_key:
1333 target_revlog += b'%s:' % target_key
1342 target_revlog += b'%s:' % target_key
1334 elif target_type == KIND_FILELOG:
1343 elif target_type == KIND_FILELOG:
1335 target_revlog = b'FILELOG:'
1344 target_revlog = b'FILELOG:'
1336 if target_key:
1345 if target_key:
1337 target_revlog += b'%s:' % target_key
1346 target_revlog += b'%s:' % target_key
1338 dbg['target-revlog'] = target_revlog
1347 dbg['target-revlog'] = target_revlog
1339
1348
1340 if self._debug_info is not None:
1349 if self._debug_info is not None:
1341 self._debug_info.append(dbg)
1350 self._debug_info.append(dbg)
1342
1351
1343 if self._write_debug is not None:
1352 if self._write_debug is not None:
1344 msg = (
1353 msg = (
1345 b"DBG-DELTAS:"
1354 b"DBG-DELTAS:"
1346 b" %-12s"
1355 b" %-12s"
1347 b" rev=%d:"
1356 b" rev=%d:"
1348 b" delta-base=%d"
1357 b" delta-base=%d"
1349 b" is-cached=%d"
1358 b" is-cached=%d"
1350 b" - search-rounds=%d"
1359 b" - search-rounds=%d"
1351 b" try-count=%d"
1360 b" try-count=%d"
1352 b" - delta-type=%-6s"
1361 b" - delta-type=%-6s"
1353 b" snap-depth=%d"
1362 b" snap-depth=%d"
1354 b" - p1-chain-length=%d"
1363 b" - p1-chain-length=%d"
1355 b" p2-chain-length=%d"
1364 b" p2-chain-length=%d"
1356 b" - duration=%f"
1365 b" - duration=%f"
1357 b"\n"
1366 b"\n"
1358 )
1367 )
1359 msg %= (
1368 msg %= (
1360 dbg["target-revlog"],
1369 dbg["target-revlog"],
1361 dbg["revision"],
1370 dbg["revision"],
1362 dbg["delta-base"],
1371 dbg["delta-base"],
1363 dbg["using-cached-base"],
1372 dbg["using-cached-base"],
1364 dbg["search_round_count"],
1373 dbg["search_round_count"],
1365 dbg["delta_try_count"],
1374 dbg["delta_try_count"],
1366 dbg["type"],
1375 dbg["type"],
1367 dbg["snapshot-depth"],
1376 dbg["snapshot-depth"],
1368 dbg["p1-chain-len"],
1377 dbg["p1-chain-len"],
1369 dbg["p2-chain-len"],
1378 dbg["p2-chain-len"],
1370 dbg["duration"],
1379 dbg["duration"],
1371 )
1380 )
1372 self._write_debug(msg)
1381 self._write_debug(msg)
1373 return deltainfo
1382 return deltainfo
1374
1383
1375
1384
1376 def delta_compression(default_compression_header, deltainfo):
1385 def delta_compression(default_compression_header, deltainfo):
1377 """return (COMPRESSION_MODE, deltainfo)
1386 """return (COMPRESSION_MODE, deltainfo)
1378
1387
1379 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1388 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1380 compression.
1389 compression.
1381 """
1390 """
1382 h, d = deltainfo.data
1391 h, d = deltainfo.data
1383 compression_mode = COMP_MODE_INLINE
1392 compression_mode = COMP_MODE_INLINE
1384 if not h and not d:
1393 if not h and not d:
1385 # not data to store at all... declare them uncompressed
1394 # not data to store at all... declare them uncompressed
1386 compression_mode = COMP_MODE_PLAIN
1395 compression_mode = COMP_MODE_PLAIN
1387 elif not h:
1396 elif not h:
1388 t = d[0:1]
1397 t = d[0:1]
1389 if t == b'\0':
1398 if t == b'\0':
1390 compression_mode = COMP_MODE_PLAIN
1399 compression_mode = COMP_MODE_PLAIN
1391 elif t == default_compression_header:
1400 elif t == default_compression_header:
1392 compression_mode = COMP_MODE_DEFAULT
1401 compression_mode = COMP_MODE_DEFAULT
1393 elif h == b'u':
1402 elif h == b'u':
1394 # we have a more efficient way to declare uncompressed
1403 # we have a more efficient way to declare uncompressed
1395 h = b''
1404 h = b''
1396 compression_mode = COMP_MODE_PLAIN
1405 compression_mode = COMP_MODE_PLAIN
1397 deltainfo = drop_u_compression(deltainfo)
1406 deltainfo = drop_u_compression(deltainfo)
1398 return compression_mode, deltainfo
1407 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now