##// END OF EJS Templates
delta-find: small documentation update...
marmoute -
r50509:5447c150 default
parent child Browse files
Show More
@@ -1,1398 +1,1398 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 KIND_CHANGELOG,
23 KIND_CHANGELOG,
24 KIND_FILELOG,
24 KIND_FILELOG,
25 KIND_MANIFESTLOG,
25 KIND_MANIFESTLOG,
26 REVIDX_ISCENSORED,
26 REVIDX_ISCENSORED,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
27 REVIDX_RAWTEXT_CHANGING_FLAGS,
28 )
28 )
29
29
30 from ..thirdparty import attr
30 from ..thirdparty import attr
31
31
32 from .. import (
32 from .. import (
33 error,
33 error,
34 mdiff,
34 mdiff,
35 util,
35 util,
36 )
36 )
37
37
38 from . import flagutil
38 from . import flagutil
39
39
40 # maximum <delta-chain-data>/<revision-text-length> ratio
40 # maximum <delta-chain-data>/<revision-text-length> ratio
41 LIMIT_DELTA2TEXT = 2
41 LIMIT_DELTA2TEXT = 2
42
42
43
43
44 class _testrevlog:
44 class _testrevlog:
45 """minimalist fake revlog to use in doctests"""
45 """minimalist fake revlog to use in doctests"""
46
46
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
47 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
48 """data is an list of revision payload boundaries"""
48 """data is an list of revision payload boundaries"""
49 self._data = data
49 self._data = data
50 self._srdensitythreshold = density
50 self._srdensitythreshold = density
51 self._srmingapsize = mingap
51 self._srmingapsize = mingap
52 self._snapshot = set(snapshot)
52 self._snapshot = set(snapshot)
53 self.index = None
53 self.index = None
54
54
55 def start(self, rev):
55 def start(self, rev):
56 if rev == nullrev:
56 if rev == nullrev:
57 return 0
57 return 0
58 if rev == 0:
58 if rev == 0:
59 return 0
59 return 0
60 return self._data[rev - 1]
60 return self._data[rev - 1]
61
61
62 def end(self, rev):
62 def end(self, rev):
63 if rev == nullrev:
63 if rev == nullrev:
64 return 0
64 return 0
65 return self._data[rev]
65 return self._data[rev]
66
66
67 def length(self, rev):
67 def length(self, rev):
68 return self.end(rev) - self.start(rev)
68 return self.end(rev) - self.start(rev)
69
69
70 def __len__(self):
70 def __len__(self):
71 return len(self._data)
71 return len(self._data)
72
72
73 def issnapshot(self, rev):
73 def issnapshot(self, rev):
74 if rev == nullrev:
74 if rev == nullrev:
75 return True
75 return True
76 return rev in self._snapshot
76 return rev in self._snapshot
77
77
78
78
79 def slicechunk(revlog, revs, targetsize=None):
79 def slicechunk(revlog, revs, targetsize=None):
80 """slice revs to reduce the amount of unrelated data to be read from disk.
80 """slice revs to reduce the amount of unrelated data to be read from disk.
81
81
82 ``revs`` is sliced into groups that should be read in one time.
82 ``revs`` is sliced into groups that should be read in one time.
83 Assume that revs are sorted.
83 Assume that revs are sorted.
84
84
85 The initial chunk is sliced until the overall density (payload/chunks-span
85 The initial chunk is sliced until the overall density (payload/chunks-span
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
86 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
87 `revlog._srmingapsize` is skipped.
87 `revlog._srmingapsize` is skipped.
88
88
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
89 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
90 For consistency with other slicing choice, this limit won't go lower than
90 For consistency with other slicing choice, this limit won't go lower than
91 `revlog._srmingapsize`.
91 `revlog._srmingapsize`.
92
92
93 If individual revisions chunk are larger than this limit, they will still
93 If individual revisions chunk are larger than this limit, they will still
94 be raised individually.
94 be raised individually.
95
95
96 >>> data = [
96 >>> data = [
97 ... 5, #00 (5)
97 ... 5, #00 (5)
98 ... 10, #01 (5)
98 ... 10, #01 (5)
99 ... 12, #02 (2)
99 ... 12, #02 (2)
100 ... 12, #03 (empty)
100 ... 12, #03 (empty)
101 ... 27, #04 (15)
101 ... 27, #04 (15)
102 ... 31, #05 (4)
102 ... 31, #05 (4)
103 ... 31, #06 (empty)
103 ... 31, #06 (empty)
104 ... 42, #07 (11)
104 ... 42, #07 (11)
105 ... 47, #08 (5)
105 ... 47, #08 (5)
106 ... 47, #09 (empty)
106 ... 47, #09 (empty)
107 ... 48, #10 (1)
107 ... 48, #10 (1)
108 ... 51, #11 (3)
108 ... 51, #11 (3)
109 ... 74, #12 (23)
109 ... 74, #12 (23)
110 ... 85, #13 (11)
110 ... 85, #13 (11)
111 ... 86, #14 (1)
111 ... 86, #14 (1)
112 ... 91, #15 (5)
112 ... 91, #15 (5)
113 ... ]
113 ... ]
114 >>> revlog = _testrevlog(data, snapshot=range(16))
114 >>> revlog = _testrevlog(data, snapshot=range(16))
115
115
116 >>> list(slicechunk(revlog, list(range(16))))
116 >>> list(slicechunk(revlog, list(range(16))))
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
117 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
118 >>> list(slicechunk(revlog, [0, 15]))
118 >>> list(slicechunk(revlog, [0, 15]))
119 [[0], [15]]
119 [[0], [15]]
120 >>> list(slicechunk(revlog, [0, 11, 15]))
120 >>> list(slicechunk(revlog, [0, 11, 15]))
121 [[0], [11], [15]]
121 [[0], [11], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
123 [[0], [11, 13, 15]]
123 [[0], [11, 13, 15]]
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
124 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
125 [[1, 2], [5, 8, 10, 11], [14]]
125 [[1, 2], [5, 8, 10, 11], [14]]
126
126
127 Slicing with a maximum chunk size
127 Slicing with a maximum chunk size
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
128 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
129 [[0], [11], [13], [15]]
129 [[0], [11], [13], [15]]
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
131 [[0], [11], [13, 15]]
131 [[0], [11], [13, 15]]
132
132
133 Slicing involving nullrev
133 Slicing involving nullrev
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
134 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
135 [[-1, 0], [11], [13, 15]]
135 [[-1, 0], [11], [13, 15]]
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
136 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
137 [[-1], [13], [15]]
137 [[-1], [13], [15]]
138 """
138 """
139 if targetsize is not None:
139 if targetsize is not None:
140 targetsize = max(targetsize, revlog._srmingapsize)
140 targetsize = max(targetsize, revlog._srmingapsize)
141 # targetsize should not be specified when evaluating delta candidates:
141 # targetsize should not be specified when evaluating delta candidates:
142 # * targetsize is used to ensure we stay within specification when reading,
142 # * targetsize is used to ensure we stay within specification when reading,
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
143 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
144 if densityslicing is None:
144 if densityslicing is None:
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
145 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
146 for chunk in densityslicing(
146 for chunk in densityslicing(
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
147 revs, revlog._srdensitythreshold, revlog._srmingapsize
148 ):
148 ):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
149 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
150 yield subchunk
150 yield subchunk
151
151
152
152
153 def _slicechunktosize(revlog, revs, targetsize=None):
153 def _slicechunktosize(revlog, revs, targetsize=None):
154 """slice revs to match the target size
154 """slice revs to match the target size
155
155
156 This is intended to be used on chunk that density slicing selected by that
156 This is intended to be used on chunk that density slicing selected by that
157 are still too large compared to the read garantee of revlog. This might
157 are still too large compared to the read garantee of revlog. This might
158 happens when "minimal gap size" interrupted the slicing or when chain are
158 happens when "minimal gap size" interrupted the slicing or when chain are
159 built in a way that create large blocks next to each other.
159 built in a way that create large blocks next to each other.
160
160
161 >>> data = [
161 >>> data = [
162 ... 3, #0 (3)
162 ... 3, #0 (3)
163 ... 5, #1 (2)
163 ... 5, #1 (2)
164 ... 6, #2 (1)
164 ... 6, #2 (1)
165 ... 8, #3 (2)
165 ... 8, #3 (2)
166 ... 8, #4 (empty)
166 ... 8, #4 (empty)
167 ... 11, #5 (3)
167 ... 11, #5 (3)
168 ... 12, #6 (1)
168 ... 12, #6 (1)
169 ... 13, #7 (1)
169 ... 13, #7 (1)
170 ... 14, #8 (1)
170 ... 14, #8 (1)
171 ... ]
171 ... ]
172
172
173 == All snapshots cases ==
173 == All snapshots cases ==
174 >>> revlog = _testrevlog(data, snapshot=range(9))
174 >>> revlog = _testrevlog(data, snapshot=range(9))
175
175
176 Cases where chunk is already small enough
176 Cases where chunk is already small enough
177 >>> list(_slicechunktosize(revlog, [0], 3))
177 >>> list(_slicechunktosize(revlog, [0], 3))
178 [[0]]
178 [[0]]
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
179 >>> list(_slicechunktosize(revlog, [6, 7], 3))
180 [[6, 7]]
180 [[6, 7]]
181 >>> list(_slicechunktosize(revlog, [0], None))
181 >>> list(_slicechunktosize(revlog, [0], None))
182 [[0]]
182 [[0]]
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
183 >>> list(_slicechunktosize(revlog, [6, 7], None))
184 [[6, 7]]
184 [[6, 7]]
185
185
186 cases where we need actual slicing
186 cases where we need actual slicing
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
187 >>> list(_slicechunktosize(revlog, [0, 1], 3))
188 [[0], [1]]
188 [[0], [1]]
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
189 >>> list(_slicechunktosize(revlog, [1, 3], 3))
190 [[1], [3]]
190 [[1], [3]]
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
192 [[1, 2], [3]]
192 [[1, 2], [3]]
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
193 >>> list(_slicechunktosize(revlog, [3, 5], 3))
194 [[3], [5]]
194 [[3], [5]]
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
197 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
198 [[5], [6, 7, 8]]
198 [[5], [6, 7, 8]]
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
200 [[0], [1, 2], [3], [5], [6, 7, 8]]
201
201
202 Case with too large individual chunk (must return valid chunk)
202 Case with too large individual chunk (must return valid chunk)
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
203 >>> list(_slicechunktosize(revlog, [0, 1], 2))
204 [[0], [1]]
204 [[0], [1]]
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
205 >>> list(_slicechunktosize(revlog, [1, 3], 1))
206 [[1], [3]]
206 [[1], [3]]
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
207 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
208 [[3], [5]]
208 [[3], [5]]
209
209
210 == No Snapshot cases ==
210 == No Snapshot cases ==
211 >>> revlog = _testrevlog(data)
211 >>> revlog = _testrevlog(data)
212
212
213 Cases where chunk is already small enough
213 Cases where chunk is already small enough
214 >>> list(_slicechunktosize(revlog, [0], 3))
214 >>> list(_slicechunktosize(revlog, [0], 3))
215 [[0]]
215 [[0]]
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
216 >>> list(_slicechunktosize(revlog, [6, 7], 3))
217 [[6, 7]]
217 [[6, 7]]
218 >>> list(_slicechunktosize(revlog, [0], None))
218 >>> list(_slicechunktosize(revlog, [0], None))
219 [[0]]
219 [[0]]
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
220 >>> list(_slicechunktosize(revlog, [6, 7], None))
221 [[6, 7]]
221 [[6, 7]]
222
222
223 cases where we need actual slicing
223 cases where we need actual slicing
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
224 >>> list(_slicechunktosize(revlog, [0, 1], 3))
225 [[0], [1]]
225 [[0], [1]]
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
226 >>> list(_slicechunktosize(revlog, [1, 3], 3))
227 [[1], [3]]
227 [[1], [3]]
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
229 [[1], [2, 3]]
229 [[1], [2, 3]]
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
230 >>> list(_slicechunktosize(revlog, [3, 5], 3))
231 [[3], [5]]
231 [[3], [5]]
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
233 [[3], [4, 5]]
233 [[3], [4, 5]]
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
234 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
235 [[5], [6, 7, 8]]
235 [[5], [6, 7, 8]]
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
237 [[0], [1, 2], [3], [5], [6, 7, 8]]
238
238
239 Case with too large individual chunk (must return valid chunk)
239 Case with too large individual chunk (must return valid chunk)
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
240 >>> list(_slicechunktosize(revlog, [0, 1], 2))
241 [[0], [1]]
241 [[0], [1]]
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
242 >>> list(_slicechunktosize(revlog, [1, 3], 1))
243 [[1], [3]]
243 [[1], [3]]
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
244 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
245 [[3], [5]]
245 [[3], [5]]
246
246
247 == mixed case ==
247 == mixed case ==
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
248 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
249 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
250 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
251 """
251 """
252 assert targetsize is None or 0 <= targetsize
252 assert targetsize is None or 0 <= targetsize
253 startdata = revlog.start(revs[0])
253 startdata = revlog.start(revs[0])
254 enddata = revlog.end(revs[-1])
254 enddata = revlog.end(revs[-1])
255 fullspan = enddata - startdata
255 fullspan = enddata - startdata
256 if targetsize is None or fullspan <= targetsize:
256 if targetsize is None or fullspan <= targetsize:
257 yield revs
257 yield revs
258 return
258 return
259
259
260 startrevidx = 0
260 startrevidx = 0
261 endrevidx = 1
261 endrevidx = 1
262 iterrevs = enumerate(revs)
262 iterrevs = enumerate(revs)
263 next(iterrevs) # skip first rev.
263 next(iterrevs) # skip first rev.
264 # first step: get snapshots out of the way
264 # first step: get snapshots out of the way
265 for idx, r in iterrevs:
265 for idx, r in iterrevs:
266 span = revlog.end(r) - startdata
266 span = revlog.end(r) - startdata
267 snapshot = revlog.issnapshot(r)
267 snapshot = revlog.issnapshot(r)
268 if span <= targetsize and snapshot:
268 if span <= targetsize and snapshot:
269 endrevidx = idx + 1
269 endrevidx = idx + 1
270 else:
270 else:
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
271 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
272 if chunk:
272 if chunk:
273 yield chunk
273 yield chunk
274 startrevidx = idx
274 startrevidx = idx
275 startdata = revlog.start(r)
275 startdata = revlog.start(r)
276 endrevidx = idx + 1
276 endrevidx = idx + 1
277 if not snapshot:
277 if not snapshot:
278 break
278 break
279
279
280 # for the others, we use binary slicing to quickly converge toward valid
280 # for the others, we use binary slicing to quickly converge toward valid
281 # chunks (otherwise, we might end up looking for start/end of many
281 # chunks (otherwise, we might end up looking for start/end of many
282 # revisions). This logic is not looking for the perfect slicing point, it
282 # revisions). This logic is not looking for the perfect slicing point, it
283 # focuses on quickly converging toward valid chunks.
283 # focuses on quickly converging toward valid chunks.
284 nbitem = len(revs)
284 nbitem = len(revs)
285 while (enddata - startdata) > targetsize:
285 while (enddata - startdata) > targetsize:
286 endrevidx = nbitem
286 endrevidx = nbitem
287 if nbitem - startrevidx <= 1:
287 if nbitem - startrevidx <= 1:
288 break # protect against individual chunk larger than limit
288 break # protect against individual chunk larger than limit
289 localenddata = revlog.end(revs[endrevidx - 1])
289 localenddata = revlog.end(revs[endrevidx - 1])
290 span = localenddata - startdata
290 span = localenddata - startdata
291 while span > targetsize:
291 while span > targetsize:
292 if endrevidx - startrevidx <= 1:
292 if endrevidx - startrevidx <= 1:
293 break # protect against individual chunk larger than limit
293 break # protect against individual chunk larger than limit
294 endrevidx -= (endrevidx - startrevidx) // 2
294 endrevidx -= (endrevidx - startrevidx) // 2
295 localenddata = revlog.end(revs[endrevidx - 1])
295 localenddata = revlog.end(revs[endrevidx - 1])
296 span = localenddata - startdata
296 span = localenddata - startdata
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
297 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
298 if chunk:
298 if chunk:
299 yield chunk
299 yield chunk
300 startrevidx = endrevidx
300 startrevidx = endrevidx
301 startdata = revlog.start(revs[startrevidx])
301 startdata = revlog.start(revs[startrevidx])
302
302
303 chunk = _trimchunk(revlog, revs, startrevidx)
303 chunk = _trimchunk(revlog, revs, startrevidx)
304 if chunk:
304 if chunk:
305 yield chunk
305 yield chunk
306
306
307
307
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
308 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
309 """slice revs to reduce the amount of unrelated data to be read from disk.
309 """slice revs to reduce the amount of unrelated data to be read from disk.
310
310
311 ``revs`` is sliced into groups that should be read in one time.
311 ``revs`` is sliced into groups that should be read in one time.
312 Assume that revs are sorted.
312 Assume that revs are sorted.
313
313
314 The initial chunk is sliced until the overall density (payload/chunks-span
314 The initial chunk is sliced until the overall density (payload/chunks-span
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
315 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
316 skipped.
316 skipped.
317
317
318 >>> revlog = _testrevlog([
318 >>> revlog = _testrevlog([
319 ... 5, #00 (5)
319 ... 5, #00 (5)
320 ... 10, #01 (5)
320 ... 10, #01 (5)
321 ... 12, #02 (2)
321 ... 12, #02 (2)
322 ... 12, #03 (empty)
322 ... 12, #03 (empty)
323 ... 27, #04 (15)
323 ... 27, #04 (15)
324 ... 31, #05 (4)
324 ... 31, #05 (4)
325 ... 31, #06 (empty)
325 ... 31, #06 (empty)
326 ... 42, #07 (11)
326 ... 42, #07 (11)
327 ... 47, #08 (5)
327 ... 47, #08 (5)
328 ... 47, #09 (empty)
328 ... 47, #09 (empty)
329 ... 48, #10 (1)
329 ... 48, #10 (1)
330 ... 51, #11 (3)
330 ... 51, #11 (3)
331 ... 74, #12 (23)
331 ... 74, #12 (23)
332 ... 85, #13 (11)
332 ... 85, #13 (11)
333 ... 86, #14 (1)
333 ... 86, #14 (1)
334 ... 91, #15 (5)
334 ... 91, #15 (5)
335 ... ])
335 ... ])
336
336
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
337 >>> list(_slicechunktodensity(revlog, list(range(16))))
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
338 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
339 >>> list(_slicechunktodensity(revlog, [0, 15]))
340 [[0], [15]]
340 [[0], [15]]
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
342 [[0], [11], [15]]
342 [[0], [11], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
344 [[0], [11, 13, 15]]
344 [[0], [11, 13, 15]]
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
345 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
346 [[1, 2], [5, 8, 10, 11], [14]]
346 [[1, 2], [5, 8, 10, 11], [14]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
348 ... mingapsize=20))
348 ... mingapsize=20))
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
349 [[1, 2, 3, 5, 8, 10, 11], [14]]
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
351 ... targetdensity=0.95))
351 ... targetdensity=0.95))
352 [[1, 2], [5], [8, 10, 11], [14]]
352 [[1, 2], [5], [8, 10, 11], [14]]
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
354 ... targetdensity=0.95, mingapsize=12))
354 ... targetdensity=0.95, mingapsize=12))
355 [[1, 2], [5, 8, 10, 11], [14]]
355 [[1, 2], [5, 8, 10, 11], [14]]
356 """
356 """
357 start = revlog.start
357 start = revlog.start
358 length = revlog.length
358 length = revlog.length
359
359
360 if len(revs) <= 1:
360 if len(revs) <= 1:
361 yield revs
361 yield revs
362 return
362 return
363
363
364 deltachainspan = segmentspan(revlog, revs)
364 deltachainspan = segmentspan(revlog, revs)
365
365
366 if deltachainspan < mingapsize:
366 if deltachainspan < mingapsize:
367 yield revs
367 yield revs
368 return
368 return
369
369
370 readdata = deltachainspan
370 readdata = deltachainspan
371 chainpayload = sum(length(r) for r in revs)
371 chainpayload = sum(length(r) for r in revs)
372
372
373 if deltachainspan:
373 if deltachainspan:
374 density = chainpayload / float(deltachainspan)
374 density = chainpayload / float(deltachainspan)
375 else:
375 else:
376 density = 1.0
376 density = 1.0
377
377
378 if density >= targetdensity:
378 if density >= targetdensity:
379 yield revs
379 yield revs
380 return
380 return
381
381
382 # Store the gaps in a heap to have them sorted by decreasing size
382 # Store the gaps in a heap to have them sorted by decreasing size
383 gaps = []
383 gaps = []
384 prevend = None
384 prevend = None
385 for i, rev in enumerate(revs):
385 for i, rev in enumerate(revs):
386 revstart = start(rev)
386 revstart = start(rev)
387 revlen = length(rev)
387 revlen = length(rev)
388
388
389 # Skip empty revisions to form larger holes
389 # Skip empty revisions to form larger holes
390 if revlen == 0:
390 if revlen == 0:
391 continue
391 continue
392
392
393 if prevend is not None:
393 if prevend is not None:
394 gapsize = revstart - prevend
394 gapsize = revstart - prevend
395 # only consider holes that are large enough
395 # only consider holes that are large enough
396 if gapsize > mingapsize:
396 if gapsize > mingapsize:
397 gaps.append((gapsize, i))
397 gaps.append((gapsize, i))
398
398
399 prevend = revstart + revlen
399 prevend = revstart + revlen
400 # sort the gaps to pop them from largest to small
400 # sort the gaps to pop them from largest to small
401 gaps.sort()
401 gaps.sort()
402
402
403 # Collect the indices of the largest holes until the density is acceptable
403 # Collect the indices of the largest holes until the density is acceptable
404 selected = []
404 selected = []
405 while gaps and density < targetdensity:
405 while gaps and density < targetdensity:
406 gapsize, gapidx = gaps.pop()
406 gapsize, gapidx = gaps.pop()
407
407
408 selected.append(gapidx)
408 selected.append(gapidx)
409
409
410 # the gap sizes are stored as negatives to be sorted decreasingly
410 # the gap sizes are stored as negatives to be sorted decreasingly
411 # by the heap
411 # by the heap
412 readdata -= gapsize
412 readdata -= gapsize
413 if readdata > 0:
413 if readdata > 0:
414 density = chainpayload / float(readdata)
414 density = chainpayload / float(readdata)
415 else:
415 else:
416 density = 1.0
416 density = 1.0
417 selected.sort()
417 selected.sort()
418
418
419 # Cut the revs at collected indices
419 # Cut the revs at collected indices
420 previdx = 0
420 previdx = 0
421 for idx in selected:
421 for idx in selected:
422
422
423 chunk = _trimchunk(revlog, revs, previdx, idx)
423 chunk = _trimchunk(revlog, revs, previdx, idx)
424 if chunk:
424 if chunk:
425 yield chunk
425 yield chunk
426
426
427 previdx = idx
427 previdx = idx
428
428
429 chunk = _trimchunk(revlog, revs, previdx)
429 chunk = _trimchunk(revlog, revs, previdx)
430 if chunk:
430 if chunk:
431 yield chunk
431 yield chunk
432
432
433
433
434 def _trimchunk(revlog, revs, startidx, endidx=None):
434 def _trimchunk(revlog, revs, startidx, endidx=None):
435 """returns revs[startidx:endidx] without empty trailing revs
435 """returns revs[startidx:endidx] without empty trailing revs
436
436
437 Doctest Setup
437 Doctest Setup
438 >>> revlog = _testrevlog([
438 >>> revlog = _testrevlog([
439 ... 5, #0
439 ... 5, #0
440 ... 10, #1
440 ... 10, #1
441 ... 12, #2
441 ... 12, #2
442 ... 12, #3 (empty)
442 ... 12, #3 (empty)
443 ... 17, #4
443 ... 17, #4
444 ... 21, #5
444 ... 21, #5
445 ... 21, #6 (empty)
445 ... 21, #6 (empty)
446 ... ])
446 ... ])
447
447
448 Contiguous cases:
448 Contiguous cases:
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
449 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
450 [0, 1, 2, 3, 4, 5]
450 [0, 1, 2, 3, 4, 5]
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
452 [0, 1, 2, 3, 4]
452 [0, 1, 2, 3, 4]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
454 [0, 1, 2]
454 [0, 1, 2]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
456 [2]
456 [2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
458 [3, 4, 5]
458 [3, 4, 5]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
460 [3, 4]
460 [3, 4]
461
461
462 Discontiguous cases:
462 Discontiguous cases:
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
463 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
464 [1, 3, 5]
464 [1, 3, 5]
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
466 [1]
466 [1]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
468 [3, 5]
468 [3, 5]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
470 [3, 5]
470 [3, 5]
471 """
471 """
472 length = revlog.length
472 length = revlog.length
473
473
474 if endidx is None:
474 if endidx is None:
475 endidx = len(revs)
475 endidx = len(revs)
476
476
477 # If we have a non-emtpy delta candidate, there are nothing to trim
477 # If we have a non-emtpy delta candidate, there are nothing to trim
478 if revs[endidx - 1] < len(revlog):
478 if revs[endidx - 1] < len(revlog):
479 # Trim empty revs at the end, except the very first revision of a chain
479 # Trim empty revs at the end, except the very first revision of a chain
480 while (
480 while (
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
481 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
482 ):
482 ):
483 endidx -= 1
483 endidx -= 1
484
484
485 return revs[startidx:endidx]
485 return revs[startidx:endidx]
486
486
487
487
488 def segmentspan(revlog, revs):
488 def segmentspan(revlog, revs):
489 """Get the byte span of a segment of revisions
489 """Get the byte span of a segment of revisions
490
490
491 revs is a sorted array of revision numbers
491 revs is a sorted array of revision numbers
492
492
493 >>> revlog = _testrevlog([
493 >>> revlog = _testrevlog([
494 ... 5, #0
494 ... 5, #0
495 ... 10, #1
495 ... 10, #1
496 ... 12, #2
496 ... 12, #2
497 ... 12, #3 (empty)
497 ... 12, #3 (empty)
498 ... 17, #4
498 ... 17, #4
499 ... ])
499 ... ])
500
500
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
501 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
502 17
502 17
503 >>> segmentspan(revlog, [0, 4])
503 >>> segmentspan(revlog, [0, 4])
504 17
504 17
505 >>> segmentspan(revlog, [3, 4])
505 >>> segmentspan(revlog, [3, 4])
506 5
506 5
507 >>> segmentspan(revlog, [1, 2, 3,])
507 >>> segmentspan(revlog, [1, 2, 3,])
508 7
508 7
509 >>> segmentspan(revlog, [1, 3])
509 >>> segmentspan(revlog, [1, 3])
510 7
510 7
511 """
511 """
512 if not revs:
512 if not revs:
513 return 0
513 return 0
514 end = revlog.end(revs[-1])
514 end = revlog.end(revs[-1])
515 return end - revlog.start(revs[0])
515 return end - revlog.start(revs[0])
516
516
517
517
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
518 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
519 """build full text from a (base, delta) pair and other metadata"""
519 """build full text from a (base, delta) pair and other metadata"""
520 # special case deltas which replace entire base; no need to decode
520 # special case deltas which replace entire base; no need to decode
521 # base revision. this neatly avoids censored bases, which throw when
521 # base revision. this neatly avoids censored bases, which throw when
522 # they're decoded.
522 # they're decoded.
523 hlen = struct.calcsize(b">lll")
523 hlen = struct.calcsize(b">lll")
524 if delta[:hlen] == mdiff.replacediffheader(
524 if delta[:hlen] == mdiff.replacediffheader(
525 revlog.rawsize(baserev), len(delta) - hlen
525 revlog.rawsize(baserev), len(delta) - hlen
526 ):
526 ):
527 fulltext = delta[hlen:]
527 fulltext = delta[hlen:]
528 else:
528 else:
529 # deltabase is rawtext before changed by flag processors, which is
529 # deltabase is rawtext before changed by flag processors, which is
530 # equivalent to non-raw text
530 # equivalent to non-raw text
531 basetext = revlog.revision(baserev, _df=fh)
531 basetext = revlog.revision(baserev, _df=fh)
532 fulltext = mdiff.patch(basetext, delta)
532 fulltext = mdiff.patch(basetext, delta)
533
533
534 try:
534 try:
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
535 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
536 if validatehash:
536 if validatehash:
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
537 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
538 if flags & REVIDX_ISCENSORED:
538 if flags & REVIDX_ISCENSORED:
539 raise error.StorageError(
539 raise error.StorageError(
540 _(b'node %s is not censored') % expectednode
540 _(b'node %s is not censored') % expectednode
541 )
541 )
542 except error.CensoredNodeError:
542 except error.CensoredNodeError:
543 # must pass the censored index flag to add censored revisions
543 # must pass the censored index flag to add censored revisions
544 if not flags & REVIDX_ISCENSORED:
544 if not flags & REVIDX_ISCENSORED:
545 raise
545 raise
546 return fulltext
546 return fulltext
547
547
548
548
549 @attr.s(slots=True, frozen=True)
549 @attr.s(slots=True, frozen=True)
550 class _deltainfo:
550 class _deltainfo:
551 distance = attr.ib()
551 distance = attr.ib()
552 deltalen = attr.ib()
552 deltalen = attr.ib()
553 data = attr.ib()
553 data = attr.ib()
554 base = attr.ib()
554 base = attr.ib()
555 chainbase = attr.ib()
555 chainbase = attr.ib()
556 chainlen = attr.ib()
556 chainlen = attr.ib()
557 compresseddeltalen = attr.ib()
557 compresseddeltalen = attr.ib()
558 snapshotdepth = attr.ib()
558 snapshotdepth = attr.ib()
559
559
560
560
561 def drop_u_compression(delta):
561 def drop_u_compression(delta):
562 """turn into a "u" (no-compression) into no-compression without header
562 """turn into a "u" (no-compression) into no-compression without header
563
563
564 This is useful for revlog format that has better compression method.
564 This is useful for revlog format that has better compression method.
565 """
565 """
566 assert delta.data[0] == b'u', delta.data[0]
566 assert delta.data[0] == b'u', delta.data[0]
567 return _deltainfo(
567 return _deltainfo(
568 delta.distance,
568 delta.distance,
569 delta.deltalen - 1,
569 delta.deltalen - 1,
570 (b'', delta.data[1]),
570 (b'', delta.data[1]),
571 delta.base,
571 delta.base,
572 delta.chainbase,
572 delta.chainbase,
573 delta.chainlen,
573 delta.chainlen,
574 delta.compresseddeltalen,
574 delta.compresseddeltalen,
575 delta.snapshotdepth,
575 delta.snapshotdepth,
576 )
576 )
577
577
578
578
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
579 def isgooddeltainfo(revlog, deltainfo, revinfo):
580 """Returns True if the given delta is good. Good means that it is within
580 """Returns True if the given delta is good. Good means that it is within
581 the disk span, disk size, and chain length bounds that we know to be
581 the disk span, disk size, and chain length bounds that we know to be
582 performant."""
582 performant."""
583 if deltainfo is None:
583 if deltainfo is None:
584 return False
584 return False
585
585
586 # - 'deltainfo.distance' is the distance from the base revision --
586 # - 'deltainfo.distance' is the distance from the base revision --
587 # bounding it limits the amount of I/O we need to do.
587 # bounding it limits the amount of I/O we need to do.
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
588 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
589 # deltas we need to apply -- bounding it limits the amount of CPU
589 # deltas we need to apply -- bounding it limits the amount of CPU
590 # we consume.
590 # we consume.
591
591
592 textlen = revinfo.textlen
592 textlen = revinfo.textlen
593 defaultmax = textlen * 4
593 defaultmax = textlen * 4
594 maxdist = revlog._maxdeltachainspan
594 maxdist = revlog._maxdeltachainspan
595 if not maxdist:
595 if not maxdist:
596 maxdist = deltainfo.distance # ensure the conditional pass
596 maxdist = deltainfo.distance # ensure the conditional pass
597 maxdist = max(maxdist, defaultmax)
597 maxdist = max(maxdist, defaultmax)
598
598
599 # Bad delta from read span:
599 # Bad delta from read span:
600 #
600 #
601 # If the span of data read is larger than the maximum allowed.
601 # If the span of data read is larger than the maximum allowed.
602 #
602 #
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
603 # In the sparse-revlog case, we rely on the associated "sparse reading"
604 # to avoid issue related to the span of data. In theory, it would be
604 # to avoid issue related to the span of data. In theory, it would be
605 # possible to build pathological revlog where delta pattern would lead
605 # possible to build pathological revlog where delta pattern would lead
606 # to too many reads. However, they do not happen in practice at all. So
606 # to too many reads. However, they do not happen in practice at all. So
607 # we skip the span check entirely.
607 # we skip the span check entirely.
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
608 if not revlog._sparserevlog and maxdist < deltainfo.distance:
609 return False
609 return False
610
610
611 # Bad delta from new delta size:
611 # Bad delta from new delta size:
612 #
612 #
613 # If the delta size is larger than the target text, storing the
613 # If the delta size is larger than the target text, storing the
614 # delta will be inefficient.
614 # delta will be inefficient.
615 if textlen < deltainfo.deltalen:
615 if textlen < deltainfo.deltalen:
616 return False
616 return False
617
617
618 # Bad delta from cumulated payload size:
618 # Bad delta from cumulated payload size:
619 #
619 #
620 # If the sum of delta get larger than K * target text length.
620 # If the sum of delta get larger than K * target text length.
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
621 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
622 return False
622 return False
623
623
624 # Bad delta from chain length:
624 # Bad delta from chain length:
625 #
625 #
626 # If the number of delta in the chain gets too high.
626 # If the number of delta in the chain gets too high.
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
627 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
628 return False
628 return False
629
629
630 # bad delta from intermediate snapshot size limit
630 # bad delta from intermediate snapshot size limit
631 #
631 #
632 # If an intermediate snapshot size is higher than the limit. The
632 # If an intermediate snapshot size is higher than the limit. The
633 # limit exist to prevent endless chain of intermediate delta to be
633 # limit exist to prevent endless chain of intermediate delta to be
634 # created.
634 # created.
635 if (
635 if (
636 deltainfo.snapshotdepth is not None
636 deltainfo.snapshotdepth is not None
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
637 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
638 ):
638 ):
639 return False
639 return False
640
640
641 # bad delta if new intermediate snapshot is larger than the previous
641 # bad delta if new intermediate snapshot is larger than the previous
642 # snapshot
642 # snapshot
643 if (
643 if (
644 deltainfo.snapshotdepth
644 deltainfo.snapshotdepth
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
645 and revlog.length(deltainfo.base) < deltainfo.deltalen
646 ):
646 ):
647 return False
647 return False
648
648
649 return True
649 return True
650
650
651
651
652 # If a revision's full text is that much bigger than a base candidate full
652 # If a revision's full text is that much bigger than a base candidate full
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
653 # text's, it is very unlikely that it will produce a valid delta. We no longer
654 # consider these candidates.
654 # consider these candidates.
655 LIMIT_BASE2TEXT = 500
655 LIMIT_BASE2TEXT = 500
656
656
657
657
658 def _candidategroups(
658 def _candidategroups(
659 revlog,
659 revlog,
660 textlen,
660 textlen,
661 p1,
661 p1,
662 p2,
662 p2,
663 cachedelta,
663 cachedelta,
664 excluded_bases=None,
664 excluded_bases=None,
665 target_rev=None,
665 target_rev=None,
666 ):
666 ):
667 """Provides group of revision to be tested as delta base
667 """Provides group of revision to be tested as delta base
668
668
669 This top level function focus on emitting groups with unique and worthwhile
669 This top level function focus on emitting groups with unique and worthwhile
670 content. See _raw_candidate_groups for details about the group order.
670 content. See _raw_candidate_groups for details about the group order.
671 """
671 """
672 # should we try to build a delta?
672 # should we try to build a delta?
673 if not (len(revlog) and revlog._storedeltachains):
673 if not (len(revlog) and revlog._storedeltachains):
674 yield None
674 yield None
675 return
675 return
676
676
677 deltalength = revlog.length
677 deltalength = revlog.length
678 deltaparent = revlog.deltaparent
678 deltaparent = revlog.deltaparent
679 sparse = revlog._sparserevlog
679 sparse = revlog._sparserevlog
680 good = None
680 good = None
681
681
682 deltas_limit = textlen * LIMIT_DELTA2TEXT
682 deltas_limit = textlen * LIMIT_DELTA2TEXT
683
683
684 tested = {nullrev}
684 tested = {nullrev}
685 candidates = _refinedgroups(
685 candidates = _refinedgroups(
686 revlog,
686 revlog,
687 p1,
687 p1,
688 p2,
688 p2,
689 cachedelta,
689 cachedelta,
690 )
690 )
691 while True:
691 while True:
692 temptative = candidates.send(good)
692 temptative = candidates.send(good)
693 if temptative is None:
693 if temptative is None:
694 break
694 break
695 group = []
695 group = []
696 for rev in temptative:
696 for rev in temptative:
697 # skip over empty delta (no need to include them in a chain)
697 # skip over empty delta (no need to include them in a chain)
698 while revlog._generaldelta and not (
698 while revlog._generaldelta and not (
699 rev == nullrev or rev in tested or deltalength(rev)
699 rev == nullrev or rev in tested or deltalength(rev)
700 ):
700 ):
701 tested.add(rev)
701 tested.add(rev)
702 rev = deltaparent(rev)
702 rev = deltaparent(rev)
703 # no need to try a delta against nullrev, this will be done as a
703 # no need to try a delta against nullrev, this will be done as a
704 # last resort.
704 # last resort.
705 if rev == nullrev:
705 if rev == nullrev:
706 continue
706 continue
707 # filter out revision we tested already
707 # filter out revision we tested already
708 if rev in tested:
708 if rev in tested:
709 continue
709 continue
710 tested.add(rev)
710 tested.add(rev)
711 # an higher authority deamed the base unworthy (e.g. censored)
711 # an higher authority deamed the base unworthy (e.g. censored)
712 if excluded_bases is not None and rev in excluded_bases:
712 if excluded_bases is not None and rev in excluded_bases:
713 continue
713 continue
714 # We are in some recomputation cases and that rev is too high in
714 # We are in some recomputation cases and that rev is too high in
715 # the revlog
715 # the revlog
716 if target_rev is not None and rev >= target_rev:
716 if target_rev is not None and rev >= target_rev:
717 continue
717 continue
718 # filter out delta base that will never produce good delta
718 # filter out delta base that will never produce good delta
719 if deltas_limit < revlog.length(rev):
719 if deltas_limit < revlog.length(rev):
720 continue
720 continue
721 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
721 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
722 continue
722 continue
723 # no delta for rawtext-changing revs (see "candelta" for why)
723 # no delta for rawtext-changing revs (see "candelta" for why)
724 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
724 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
725 continue
725 continue
726
726
727 # If we reach here, we are about to build and test a delta.
727 # If we reach here, we are about to build and test a delta.
728 # The delta building process will compute the chaininfo in all
728 # The delta building process will compute the chaininfo in all
729 # case, since that computation is cached, it is fine to access it
729 # case, since that computation is cached, it is fine to access it
730 # here too.
730 # here too.
731 chainlen, chainsize = revlog._chaininfo(rev)
731 chainlen, chainsize = revlog._chaininfo(rev)
732 # if chain will be too long, skip base
732 # if chain will be too long, skip base
733 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
733 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
734 continue
734 continue
735 # if chain already have too much data, skip base
735 # if chain already have too much data, skip base
736 if deltas_limit < chainsize:
736 if deltas_limit < chainsize:
737 continue
737 continue
738 if sparse and revlog.upperboundcomp is not None:
738 if sparse and revlog.upperboundcomp is not None:
739 maxcomp = revlog.upperboundcomp
739 maxcomp = revlog.upperboundcomp
740 basenotsnap = (p1, p2, nullrev)
740 basenotsnap = (p1, p2, nullrev)
741 if rev not in basenotsnap and revlog.issnapshot(rev):
741 if rev not in basenotsnap and revlog.issnapshot(rev):
742 snapshotdepth = revlog.snapshotdepth(rev)
742 snapshotdepth = revlog.snapshotdepth(rev)
743 # If text is significantly larger than the base, we can
743 # If text is significantly larger than the base, we can
744 # expect the resulting delta to be proportional to the size
744 # expect the resulting delta to be proportional to the size
745 # difference
745 # difference
746 revsize = revlog.rawsize(rev)
746 revsize = revlog.rawsize(rev)
747 rawsizedistance = max(textlen - revsize, 0)
747 rawsizedistance = max(textlen - revsize, 0)
748 # use an estimate of the compression upper bound.
748 # use an estimate of the compression upper bound.
749 lowestrealisticdeltalen = rawsizedistance // maxcomp
749 lowestrealisticdeltalen = rawsizedistance // maxcomp
750
750
751 # check the absolute constraint on the delta size
751 # check the absolute constraint on the delta size
752 snapshotlimit = textlen >> snapshotdepth
752 snapshotlimit = textlen >> snapshotdepth
753 if snapshotlimit < lowestrealisticdeltalen:
753 if snapshotlimit < lowestrealisticdeltalen:
754 # delta lower bound is larger than accepted upper bound
754 # delta lower bound is larger than accepted upper bound
755 continue
755 continue
756
756
757 # check the relative constraint on the delta size
757 # check the relative constraint on the delta size
758 revlength = revlog.length(rev)
758 revlength = revlog.length(rev)
759 if revlength < lowestrealisticdeltalen:
759 if revlength < lowestrealisticdeltalen:
760 # delta probable lower bound is larger than target base
760 # delta probable lower bound is larger than target base
761 continue
761 continue
762
762
763 group.append(rev)
763 group.append(rev)
764 if group:
764 if group:
765 # XXX: in the sparse revlog case, group can become large,
765 # XXX: in the sparse revlog case, group can become large,
766 # impacting performances. Some bounding or slicing mecanism
766 # impacting performances. Some bounding or slicing mecanism
767 # would help to reduce this impact.
767 # would help to reduce this impact.
768 good = yield tuple(group)
768 good = yield tuple(group)
769 yield None
769 yield None
770
770
771
771
772 def _findsnapshots(revlog, cache, start_rev):
772 def _findsnapshots(revlog, cache, start_rev):
773 """find snapshot from start_rev to tip"""
773 """find snapshot from start_rev to tip"""
774 if util.safehasattr(revlog.index, b'findsnapshots'):
774 if util.safehasattr(revlog.index, b'findsnapshots'):
775 revlog.index.findsnapshots(cache, start_rev)
775 revlog.index.findsnapshots(cache, start_rev)
776 else:
776 else:
777 deltaparent = revlog.deltaparent
777 deltaparent = revlog.deltaparent
778 issnapshot = revlog.issnapshot
778 issnapshot = revlog.issnapshot
779 for rev in revlog.revs(start_rev):
779 for rev in revlog.revs(start_rev):
780 if issnapshot(rev):
780 if issnapshot(rev):
781 cache[deltaparent(rev)].append(rev)
781 cache[deltaparent(rev)].append(rev)
782
782
783
783
784 def _refinedgroups(revlog, p1, p2, cachedelta):
784 def _refinedgroups(revlog, p1, p2, cachedelta):
785 good = None
785 good = None
786 # First we try to reuse a the delta contained in the bundle.
786 # First we try to reuse a the delta contained in the bundle.
787 # (or from the source revlog)
787 # (or from the source revlog)
788 #
788 #
789 # This logic only applies to general delta repositories and can be disabled
789 # This logic only applies to general delta repositories and can be disabled
790 # through configuration. Disabling reuse source delta is useful when
790 # through configuration. Disabling reuse source delta is useful when
791 # we want to make sure we recomputed "optimal" deltas.
791 # we want to make sure we recomputed "optimal" deltas.
792 debug_info = None
792 debug_info = None
793 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
793 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
794 # Assume what we received from the server is a good choice
794 # Assume what we received from the server is a good choice
795 # build delta will reuse the cache
795 # build delta will reuse the cache
796 if debug_info is not None:
796 if debug_info is not None:
797 debug_info['cached-delta.tested'] += 1
797 debug_info['cached-delta.tested'] += 1
798 good = yield (cachedelta[0],)
798 good = yield (cachedelta[0],)
799 if good is not None:
799 if good is not None:
800 if debug_info is not None:
800 if debug_info is not None:
801 debug_info['cached-delta.accepted'] += 1
801 debug_info['cached-delta.accepted'] += 1
802 yield None
802 yield None
803 return
803 return
804 # XXX cache me higher
804 # XXX cache me higher
805 snapshots = collections.defaultdict(list)
805 snapshots = collections.defaultdict(list)
806 groups = _rawgroups(
806 groups = _rawgroups(
807 revlog,
807 revlog,
808 p1,
808 p1,
809 p2,
809 p2,
810 cachedelta,
810 cachedelta,
811 snapshots,
811 snapshots,
812 )
812 )
813 for candidates in groups:
813 for candidates in groups:
814 good = yield candidates
814 good = yield candidates
815 if good is not None:
815 if good is not None:
816 break
816 break
817
817
818 # If sparse revlog is enabled, we can try to refine the available deltas
818 # If sparse revlog is enabled, we can try to refine the available deltas
819 if not revlog._sparserevlog:
819 if not revlog._sparserevlog:
820 yield None
820 yield None
821 return
821 return
822
822
823 # if we have a refinable value, try to refine it
823 # if we have a refinable value, try to refine it
824 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
824 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
825 # refine snapshot down
825 # refine snapshot down
826 previous = None
826 previous = None
827 while previous != good:
827 while previous != good:
828 previous = good
828 previous = good
829 base = revlog.deltaparent(good)
829 base = revlog.deltaparent(good)
830 if base == nullrev:
830 if base == nullrev:
831 break
831 break
832 good = yield (base,)
832 good = yield (base,)
833 # refine snapshot up
833 # refine snapshot up
834 if not snapshots:
834 if not snapshots:
835 _findsnapshots(revlog, snapshots, good + 1)
835 _findsnapshots(revlog, snapshots, good + 1)
836 previous = None
836 previous = None
837 while good != previous:
837 while good != previous:
838 previous = good
838 previous = good
839 children = tuple(sorted(c for c in snapshots[good]))
839 children = tuple(sorted(c for c in snapshots[good]))
840 good = yield children
840 good = yield children
841
841
842 if debug_info is not None:
842 if debug_info is not None:
843 if good is None:
843 if good is None:
844 debug_info['no-solution'] += 1
844 debug_info['no-solution'] += 1
845
845
846 yield None
846 yield None
847
847
848
848
849 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
849 def _rawgroups(revlog, p1, p2, cachedelta, snapshots=None):
850 """Provides group of revision to be tested as delta base
850 """Provides group of revision to be tested as delta base
851
851
852 This lower level function focus on emitting delta theorically interresting
852 This lower level function focus on emitting delta theorically interresting
853 without looking it any practical details.
853 without looking it any practical details.
854
854
855 The group order aims at providing fast or small candidates first.
855 The group order aims at providing fast or small candidates first.
856 """
856 """
857 gdelta = revlog._generaldelta
857 gdelta = revlog._generaldelta
858 # gate sparse behind general-delta because of issue6056
858 # gate sparse behind general-delta because of issue6056
859 sparse = gdelta and revlog._sparserevlog
859 sparse = gdelta and revlog._sparserevlog
860 curr = len(revlog)
860 curr = len(revlog)
861 prev = curr - 1
861 prev = curr - 1
862 deltachain = lambda rev: revlog._deltachain(rev)[0]
862 deltachain = lambda rev: revlog._deltachain(rev)[0]
863
863
864 if gdelta:
864 if gdelta:
865 # exclude already lazy tested base if any
865 # exclude already lazy tested base if any
866 parents = [p for p in (p1, p2) if p != nullrev]
866 parents = [p for p in (p1, p2) if p != nullrev]
867
867
868 if not revlog._deltabothparents and len(parents) == 2:
868 if not revlog._deltabothparents and len(parents) == 2:
869 parents.sort()
869 parents.sort()
870 # To minimize the chance of having to build a fulltext,
870 # To minimize the chance of having to build a fulltext,
871 # pick first whichever parent is closest to us (max rev)
871 # pick first whichever parent is closest to us (max rev)
872 yield (parents[1],)
872 yield (parents[1],)
873 # then the other one (min rev) if the first did not fit
873 # then the other one (min rev) if the first did not fit
874 yield (parents[0],)
874 yield (parents[0],)
875 elif len(parents) > 0:
875 elif len(parents) > 0:
876 # Test all parents (1 or 2), and keep the best candidate
876 # Test all parents (1 or 2), and keep the best candidate
877 yield parents
877 yield parents
878
878
879 if sparse and parents:
879 if sparse and parents:
880 if snapshots is None:
880 if snapshots is None:
881 # map: base-rev: snapshot-rev
881 # map: base-rev: [snapshot-revs]
882 snapshots = collections.defaultdict(list)
882 snapshots = collections.defaultdict(list)
883 # See if we can use an existing snapshot in the parent chains to use as
883 # See if we can use an existing snapshot in the parent chains to use as
884 # a base for a new intermediate-snapshot
884 # a base for a new intermediate-snapshot
885 #
885 #
886 # search for snapshot in parents delta chain
886 # search for snapshot in parents delta chain
887 # map: snapshot-level: snapshot-rev
887 # map: snapshot-level: snapshot-rev
888 parents_snaps = collections.defaultdict(set)
888 parents_snaps = collections.defaultdict(set)
889 candidate_chains = [deltachain(p) for p in parents]
889 candidate_chains = [deltachain(p) for p in parents]
890 for chain in candidate_chains:
890 for chain in candidate_chains:
891 for idx, s in enumerate(chain):
891 for idx, s in enumerate(chain):
892 if not revlog.issnapshot(s):
892 if not revlog.issnapshot(s):
893 break
893 break
894 parents_snaps[idx].add(s)
894 parents_snaps[idx].add(s)
895 snapfloor = min(parents_snaps[0]) + 1
895 snapfloor = min(parents_snaps[0]) + 1
896 _findsnapshots(revlog, snapshots, snapfloor)
896 _findsnapshots(revlog, snapshots, snapfloor)
897 # search for the highest "unrelated" revision
897 # search for the highest "unrelated" revision
898 #
898 #
899 # Adding snapshots used by "unrelated" revision increase the odd we
899 # Adding snapshots used by "unrelated" revision increase the odd we
900 # reuse an independant, yet better snapshot chain.
900 # reuse an independant, yet better snapshot chain.
901 #
901 #
902 # XXX instead of building a set of revisions, we could lazily enumerate
902 # XXX instead of building a set of revisions, we could lazily enumerate
903 # over the chains. That would be more efficient, however we stick to
903 # over the chains. That would be more efficient, however we stick to
904 # simple code for now.
904 # simple code for now.
905 all_revs = set()
905 all_revs = set()
906 for chain in candidate_chains:
906 for chain in candidate_chains:
907 all_revs.update(chain)
907 all_revs.update(chain)
908 other = None
908 other = None
909 for r in revlog.revs(prev, snapfloor):
909 for r in revlog.revs(prev, snapfloor):
910 if r not in all_revs:
910 if r not in all_revs:
911 other = r
911 other = r
912 break
912 break
913 if other is not None:
913 if other is not None:
914 # To avoid unfair competition, we won't use unrelated intermediate
914 # To avoid unfair competition, we won't use unrelated intermediate
915 # snapshot that are deeper than the ones from the parent delta
915 # snapshot that are deeper than the ones from the parent delta
916 # chain.
916 # chain.
917 max_depth = max(parents_snaps.keys())
917 max_depth = max(parents_snaps.keys())
918 chain = deltachain(other)
918 chain = deltachain(other)
919 for idx, s in enumerate(chain):
919 for idx, s in enumerate(chain):
920 if s < snapfloor:
920 if s < snapfloor:
921 continue
921 continue
922 if max_depth < idx:
922 if max_depth < idx:
923 break
923 break
924 if not revlog.issnapshot(s):
924 if not revlog.issnapshot(s):
925 break
925 break
926 parents_snaps[idx].add(s)
926 parents_snaps[idx].add(s)
927 # Test them as possible intermediate snapshot base
927 # Test them as possible intermediate snapshot base
928 # We test them from highest to lowest level. High level one are more
928 # We test them from highest to lowest level. High level one are more
929 # likely to result in small delta
929 # likely to result in small delta
930 floor = None
930 floor = None
931 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
931 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
932 siblings = set()
932 siblings = set()
933 for s in snaps:
933 for s in snaps:
934 siblings.update(snapshots[s])
934 siblings.update(snapshots[s])
935 # Before considering making a new intermediate snapshot, we check
935 # Before considering making a new intermediate snapshot, we check
936 # if an existing snapshot, children of base we consider, would be
936 # if an existing snapshot, children of base we consider, would be
937 # suitable.
937 # suitable.
938 #
938 #
939 # It give a change to reuse a delta chain "unrelated" to the
939 # It give a change to reuse a delta chain "unrelated" to the
940 # current revision instead of starting our own. Without such
940 # current revision instead of starting our own. Without such
941 # re-use, topological branches would keep reopening new chains.
941 # re-use, topological branches would keep reopening new chains.
942 # Creating more and more snapshot as the repository grow.
942 # Creating more and more snapshot as the repository grow.
943
943
944 if floor is not None:
944 if floor is not None:
945 # We only do this for siblings created after the one in our
945 # We only do this for siblings created after the one in our
946 # parent's delta chain. Those created before has less chances
946 # parent's delta chain. Those created before has less chances
947 # to be valid base since our ancestors had to create a new
947 # to be valid base since our ancestors had to create a new
948 # snapshot.
948 # snapshot.
949 siblings = [r for r in siblings if floor < r]
949 siblings = [r for r in siblings if floor < r]
950 yield tuple(sorted(siblings))
950 yield tuple(sorted(siblings))
951 # then test the base from our parent's delta chain.
951 # then test the base from our parent's delta chain.
952 yield tuple(sorted(snaps))
952 yield tuple(sorted(snaps))
953 floor = min(snaps)
953 floor = min(snaps)
954 # No suitable base found in the parent chain, search if any full
954 # No suitable base found in the parent chain, search if any full
955 # snapshots emitted since parent's base would be a suitable base for an
955 # snapshots emitted since parent's base would be a suitable base for an
956 # intermediate snapshot.
956 # intermediate snapshot.
957 #
957 #
958 # It give a chance to reuse a delta chain unrelated to the current
958 # It give a chance to reuse a delta chain unrelated to the current
959 # revisions instead of starting our own. Without such re-use,
959 # revisions instead of starting our own. Without such re-use,
960 # topological branches would keep reopening new full chains. Creating
960 # topological branches would keep reopening new full chains. Creating
961 # more and more snapshot as the repository grow.
961 # more and more snapshot as the repository grow.
962 yield tuple(snapshots[nullrev])
962 yield tuple(snapshots[nullrev])
963
963
964 if not sparse:
964 if not sparse:
965 # other approach failed try against prev to hopefully save us a
965 # other approach failed try against prev to hopefully save us a
966 # fulltext.
966 # fulltext.
967 yield (prev,)
967 yield (prev,)
968
968
969
969
970 class deltacomputer:
970 class deltacomputer:
971 def __init__(
971 def __init__(
972 self,
972 self,
973 revlog,
973 revlog,
974 write_debug=None,
974 write_debug=None,
975 debug_search=False,
975 debug_search=False,
976 debug_info=None,
976 debug_info=None,
977 ):
977 ):
978 self.revlog = revlog
978 self.revlog = revlog
979 self._write_debug = write_debug
979 self._write_debug = write_debug
980 self._debug_search = debug_search
980 self._debug_search = debug_search
981 self._debug_info = debug_info
981 self._debug_info = debug_info
982
982
983 def buildtext(self, revinfo, fh):
983 def buildtext(self, revinfo, fh):
984 """Builds a fulltext version of a revision
984 """Builds a fulltext version of a revision
985
985
986 revinfo: revisioninfo instance that contains all needed info
986 revinfo: revisioninfo instance that contains all needed info
987 fh: file handle to either the .i or the .d revlog file,
987 fh: file handle to either the .i or the .d revlog file,
988 depending on whether it is inlined or not
988 depending on whether it is inlined or not
989 """
989 """
990 btext = revinfo.btext
990 btext = revinfo.btext
991 if btext[0] is not None:
991 if btext[0] is not None:
992 return btext[0]
992 return btext[0]
993
993
994 revlog = self.revlog
994 revlog = self.revlog
995 cachedelta = revinfo.cachedelta
995 cachedelta = revinfo.cachedelta
996 baserev = cachedelta[0]
996 baserev = cachedelta[0]
997 delta = cachedelta[1]
997 delta = cachedelta[1]
998
998
999 fulltext = btext[0] = _textfromdelta(
999 fulltext = btext[0] = _textfromdelta(
1000 fh,
1000 fh,
1001 revlog,
1001 revlog,
1002 baserev,
1002 baserev,
1003 delta,
1003 delta,
1004 revinfo.p1,
1004 revinfo.p1,
1005 revinfo.p2,
1005 revinfo.p2,
1006 revinfo.flags,
1006 revinfo.flags,
1007 revinfo.node,
1007 revinfo.node,
1008 )
1008 )
1009 return fulltext
1009 return fulltext
1010
1010
1011 def _builddeltadiff(self, base, revinfo, fh):
1011 def _builddeltadiff(self, base, revinfo, fh):
1012 revlog = self.revlog
1012 revlog = self.revlog
1013 t = self.buildtext(revinfo, fh)
1013 t = self.buildtext(revinfo, fh)
1014 if revlog.iscensored(base):
1014 if revlog.iscensored(base):
1015 # deltas based on a censored revision must replace the
1015 # deltas based on a censored revision must replace the
1016 # full content in one patch, so delta works everywhere
1016 # full content in one patch, so delta works everywhere
1017 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1017 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1018 delta = header + t
1018 delta = header + t
1019 else:
1019 else:
1020 ptext = revlog.rawdata(base, _df=fh)
1020 ptext = revlog.rawdata(base, _df=fh)
1021 delta = mdiff.textdiff(ptext, t)
1021 delta = mdiff.textdiff(ptext, t)
1022
1022
1023 return delta
1023 return delta
1024
1024
1025 def _builddeltainfo(self, revinfo, base, fh):
1025 def _builddeltainfo(self, revinfo, base, fh):
1026 # can we use the cached delta?
1026 # can we use the cached delta?
1027 revlog = self.revlog
1027 revlog = self.revlog
1028 debug_search = self._write_debug is not None and self._debug_search
1028 debug_search = self._write_debug is not None and self._debug_search
1029 chainbase = revlog.chainbase(base)
1029 chainbase = revlog.chainbase(base)
1030 if revlog._generaldelta:
1030 if revlog._generaldelta:
1031 deltabase = base
1031 deltabase = base
1032 else:
1032 else:
1033 deltabase = chainbase
1033 deltabase = chainbase
1034 snapshotdepth = None
1034 snapshotdepth = None
1035 if revlog._sparserevlog and deltabase == nullrev:
1035 if revlog._sparserevlog and deltabase == nullrev:
1036 snapshotdepth = 0
1036 snapshotdepth = 0
1037 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1037 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1038 # A delta chain should always be one full snapshot,
1038 # A delta chain should always be one full snapshot,
1039 # zero or more semi-snapshots, and zero or more deltas
1039 # zero or more semi-snapshots, and zero or more deltas
1040 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1040 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1041 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1041 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1042 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1042 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1043 delta = None
1043 delta = None
1044 if revinfo.cachedelta:
1044 if revinfo.cachedelta:
1045 cachebase, cachediff = revinfo.cachedelta
1045 cachebase, cachediff = revinfo.cachedelta
1046 # check if the diff still apply
1046 # check if the diff still apply
1047 currentbase = cachebase
1047 currentbase = cachebase
1048 while (
1048 while (
1049 currentbase != nullrev
1049 currentbase != nullrev
1050 and currentbase != base
1050 and currentbase != base
1051 and self.revlog.length(currentbase) == 0
1051 and self.revlog.length(currentbase) == 0
1052 ):
1052 ):
1053 currentbase = self.revlog.deltaparent(currentbase)
1053 currentbase = self.revlog.deltaparent(currentbase)
1054 if self.revlog._lazydelta and currentbase == base:
1054 if self.revlog._lazydelta and currentbase == base:
1055 delta = revinfo.cachedelta[1]
1055 delta = revinfo.cachedelta[1]
1056 if delta is None:
1056 if delta is None:
1057 delta = self._builddeltadiff(base, revinfo, fh)
1057 delta = self._builddeltadiff(base, revinfo, fh)
1058 if debug_search:
1058 if debug_search:
1059 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1059 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1060 msg %= len(delta)
1060 msg %= len(delta)
1061 self._write_debug(msg)
1061 self._write_debug(msg)
1062 # snapshotdept need to be neither None nor 0 level snapshot
1062 # snapshotdept need to be neither None nor 0 level snapshot
1063 if revlog.upperboundcomp is not None and snapshotdepth:
1063 if revlog.upperboundcomp is not None and snapshotdepth:
1064 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1064 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1065 snapshotlimit = revinfo.textlen >> snapshotdepth
1065 snapshotlimit = revinfo.textlen >> snapshotdepth
1066 if debug_search:
1066 if debug_search:
1067 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1067 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1068 msg %= lowestrealisticdeltalen
1068 msg %= lowestrealisticdeltalen
1069 self._write_debug(msg)
1069 self._write_debug(msg)
1070 if snapshotlimit < lowestrealisticdeltalen:
1070 if snapshotlimit < lowestrealisticdeltalen:
1071 if debug_search:
1071 if debug_search:
1072 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1072 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1073 self._write_debug(msg)
1073 self._write_debug(msg)
1074 return None
1074 return None
1075 if revlog.length(base) < lowestrealisticdeltalen:
1075 if revlog.length(base) < lowestrealisticdeltalen:
1076 if debug_search:
1076 if debug_search:
1077 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1077 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1078 self._write_debug(msg)
1078 self._write_debug(msg)
1079 return None
1079 return None
1080 header, data = revlog.compress(delta)
1080 header, data = revlog.compress(delta)
1081 deltalen = len(header) + len(data)
1081 deltalen = len(header) + len(data)
1082 offset = revlog.end(len(revlog) - 1)
1082 offset = revlog.end(len(revlog) - 1)
1083 dist = deltalen + offset - revlog.start(chainbase)
1083 dist = deltalen + offset - revlog.start(chainbase)
1084 chainlen, compresseddeltalen = revlog._chaininfo(base)
1084 chainlen, compresseddeltalen = revlog._chaininfo(base)
1085 chainlen += 1
1085 chainlen += 1
1086 compresseddeltalen += deltalen
1086 compresseddeltalen += deltalen
1087
1087
1088 return _deltainfo(
1088 return _deltainfo(
1089 dist,
1089 dist,
1090 deltalen,
1090 deltalen,
1091 (header, data),
1091 (header, data),
1092 deltabase,
1092 deltabase,
1093 chainbase,
1093 chainbase,
1094 chainlen,
1094 chainlen,
1095 compresseddeltalen,
1095 compresseddeltalen,
1096 snapshotdepth,
1096 snapshotdepth,
1097 )
1097 )
1098
1098
1099 def _fullsnapshotinfo(self, fh, revinfo, curr):
1099 def _fullsnapshotinfo(self, fh, revinfo, curr):
1100 rawtext = self.buildtext(revinfo, fh)
1100 rawtext = self.buildtext(revinfo, fh)
1101 data = self.revlog.compress(rawtext)
1101 data = self.revlog.compress(rawtext)
1102 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1102 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1103 deltabase = chainbase = curr
1103 deltabase = chainbase = curr
1104 snapshotdepth = 0
1104 snapshotdepth = 0
1105 chainlen = 1
1105 chainlen = 1
1106
1106
1107 return _deltainfo(
1107 return _deltainfo(
1108 dist,
1108 dist,
1109 deltalen,
1109 deltalen,
1110 data,
1110 data,
1111 deltabase,
1111 deltabase,
1112 chainbase,
1112 chainbase,
1113 chainlen,
1113 chainlen,
1114 compresseddeltalen,
1114 compresseddeltalen,
1115 snapshotdepth,
1115 snapshotdepth,
1116 )
1116 )
1117
1117
1118 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1118 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1119 """Find an acceptable delta against a candidate revision
1119 """Find an acceptable delta against a candidate revision
1120
1120
1121 revinfo: information about the revision (instance of _revisioninfo)
1121 revinfo: information about the revision (instance of _revisioninfo)
1122 fh: file handle to either the .i or the .d revlog file,
1122 fh: file handle to either the .i or the .d revlog file,
1123 depending on whether it is inlined or not
1123 depending on whether it is inlined or not
1124
1124
1125 Returns the first acceptable candidate revision, as ordered by
1125 Returns the first acceptable candidate revision, as ordered by
1126 _candidategroups
1126 _candidategroups
1127
1127
1128 If no suitable deltabase is found, we return delta info for a full
1128 If no suitable deltabase is found, we return delta info for a full
1129 snapshot.
1129 snapshot.
1130
1130
1131 `excluded_bases` is an optional set of revision that cannot be used as
1131 `excluded_bases` is an optional set of revision that cannot be used as
1132 a delta base. Use this to recompute delta suitable in censor or strip
1132 a delta base. Use this to recompute delta suitable in censor or strip
1133 context.
1133 context.
1134 """
1134 """
1135 if target_rev is None:
1135 if target_rev is None:
1136 target_rev = len(self.revlog)
1136 target_rev = len(self.revlog)
1137
1137
1138 if not revinfo.textlen:
1138 if not revinfo.textlen:
1139 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1139 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1140
1140
1141 if excluded_bases is None:
1141 if excluded_bases is None:
1142 excluded_bases = set()
1142 excluded_bases = set()
1143
1143
1144 # no delta for flag processor revision (see "candelta" for why)
1144 # no delta for flag processor revision (see "candelta" for why)
1145 # not calling candelta since only one revision needs test, also to
1145 # not calling candelta since only one revision needs test, also to
1146 # avoid overhead fetching flags again.
1146 # avoid overhead fetching flags again.
1147 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1147 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1148 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1148 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1149
1149
1150 gather_debug = (
1150 gather_debug = (
1151 self._write_debug is not None or self._debug_info is not None
1151 self._write_debug is not None or self._debug_info is not None
1152 )
1152 )
1153 debug_search = self._write_debug is not None and self._debug_search
1153 debug_search = self._write_debug is not None and self._debug_search
1154
1154
1155 if gather_debug:
1155 if gather_debug:
1156 start = util.timer()
1156 start = util.timer()
1157
1157
1158 # count the number of different delta we tried (for debug purpose)
1158 # count the number of different delta we tried (for debug purpose)
1159 dbg_try_count = 0
1159 dbg_try_count = 0
1160 # count the number of "search round" we did. (for debug purpose)
1160 # count the number of "search round" we did. (for debug purpose)
1161 dbg_try_rounds = 0
1161 dbg_try_rounds = 0
1162 dbg_type = b'unknown'
1162 dbg_type = b'unknown'
1163
1163
1164 cachedelta = revinfo.cachedelta
1164 cachedelta = revinfo.cachedelta
1165 p1 = revinfo.p1
1165 p1 = revinfo.p1
1166 p2 = revinfo.p2
1166 p2 = revinfo.p2
1167 revlog = self.revlog
1167 revlog = self.revlog
1168
1168
1169 deltainfo = None
1169 deltainfo = None
1170 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1170 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1171
1171
1172 if gather_debug:
1172 if gather_debug:
1173 if p1r != nullrev:
1173 if p1r != nullrev:
1174 p1_chain_len = revlog._chaininfo(p1r)[0]
1174 p1_chain_len = revlog._chaininfo(p1r)[0]
1175 else:
1175 else:
1176 p1_chain_len = -1
1176 p1_chain_len = -1
1177 if p2r != nullrev:
1177 if p2r != nullrev:
1178 p2_chain_len = revlog._chaininfo(p2r)[0]
1178 p2_chain_len = revlog._chaininfo(p2r)[0]
1179 else:
1179 else:
1180 p2_chain_len = -1
1180 p2_chain_len = -1
1181 if debug_search:
1181 if debug_search:
1182 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1182 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1183 msg %= target_rev
1183 msg %= target_rev
1184 self._write_debug(msg)
1184 self._write_debug(msg)
1185
1185
1186 groups = _candidategroups(
1186 groups = _candidategroups(
1187 self.revlog,
1187 self.revlog,
1188 revinfo.textlen,
1188 revinfo.textlen,
1189 p1r,
1189 p1r,
1190 p2r,
1190 p2r,
1191 cachedelta,
1191 cachedelta,
1192 excluded_bases,
1192 excluded_bases,
1193 target_rev,
1193 target_rev,
1194 )
1194 )
1195 candidaterevs = next(groups)
1195 candidaterevs = next(groups)
1196 while candidaterevs is not None:
1196 while candidaterevs is not None:
1197 dbg_try_rounds += 1
1197 dbg_try_rounds += 1
1198 if debug_search:
1198 if debug_search:
1199 prev = None
1199 prev = None
1200 if deltainfo is not None:
1200 if deltainfo is not None:
1201 prev = deltainfo.base
1201 prev = deltainfo.base
1202
1202
1203 if (
1203 if (
1204 cachedelta is not None
1204 cachedelta is not None
1205 and len(candidaterevs) == 1
1205 and len(candidaterevs) == 1
1206 and cachedelta[0] in candidaterevs
1206 and cachedelta[0] in candidaterevs
1207 ):
1207 ):
1208 round_type = b"cached-delta"
1208 round_type = b"cached-delta"
1209 elif p1 in candidaterevs or p2 in candidaterevs:
1209 elif p1 in candidaterevs or p2 in candidaterevs:
1210 round_type = b"parents"
1210 round_type = b"parents"
1211 elif prev is not None and all(c < prev for c in candidaterevs):
1211 elif prev is not None and all(c < prev for c in candidaterevs):
1212 round_type = b"refine-down"
1212 round_type = b"refine-down"
1213 elif prev is not None and all(c > prev for c in candidaterevs):
1213 elif prev is not None and all(c > prev for c in candidaterevs):
1214 round_type = b"refine-up"
1214 round_type = b"refine-up"
1215 else:
1215 else:
1216 round_type = b"search-down"
1216 round_type = b"search-down"
1217 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1217 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1218 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1218 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1219 self._write_debug(msg)
1219 self._write_debug(msg)
1220 nominateddeltas = []
1220 nominateddeltas = []
1221 if deltainfo is not None:
1221 if deltainfo is not None:
1222 if debug_search:
1222 if debug_search:
1223 msg = (
1223 msg = (
1224 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1224 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1225 )
1225 )
1226 msg %= (deltainfo.base, deltainfo.deltalen)
1226 msg %= (deltainfo.base, deltainfo.deltalen)
1227 self._write_debug(msg)
1227 self._write_debug(msg)
1228 # if we already found a good delta,
1228 # if we already found a good delta,
1229 # challenge it against refined candidates
1229 # challenge it against refined candidates
1230 nominateddeltas.append(deltainfo)
1230 nominateddeltas.append(deltainfo)
1231 for candidaterev in candidaterevs:
1231 for candidaterev in candidaterevs:
1232 if debug_search:
1232 if debug_search:
1233 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1233 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1234 msg %= candidaterev
1234 msg %= candidaterev
1235 self._write_debug(msg)
1235 self._write_debug(msg)
1236 candidate_type = None
1236 candidate_type = None
1237 if candidaterev == p1:
1237 if candidaterev == p1:
1238 candidate_type = b"p1"
1238 candidate_type = b"p1"
1239 elif candidaterev == p2:
1239 elif candidaterev == p2:
1240 candidate_type = b"p2"
1240 candidate_type = b"p2"
1241 elif self.revlog.issnapshot(candidaterev):
1241 elif self.revlog.issnapshot(candidaterev):
1242 candidate_type = b"snapshot-%d"
1242 candidate_type = b"snapshot-%d"
1243 candidate_type %= self.revlog.snapshotdepth(
1243 candidate_type %= self.revlog.snapshotdepth(
1244 candidaterev
1244 candidaterev
1245 )
1245 )
1246
1246
1247 if candidate_type is not None:
1247 if candidate_type is not None:
1248 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1248 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1249 msg %= candidate_type
1249 msg %= candidate_type
1250 self._write_debug(msg)
1250 self._write_debug(msg)
1251 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1251 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1252 msg %= self.revlog.length(candidaterev)
1252 msg %= self.revlog.length(candidaterev)
1253 self._write_debug(msg)
1253 self._write_debug(msg)
1254 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1254 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1255 msg %= self.revlog.deltaparent(candidaterev)
1255 msg %= self.revlog.deltaparent(candidaterev)
1256 self._write_debug(msg)
1256 self._write_debug(msg)
1257
1257
1258 dbg_try_count += 1
1258 dbg_try_count += 1
1259
1259
1260 if debug_search:
1260 if debug_search:
1261 delta_start = util.timer()
1261 delta_start = util.timer()
1262 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1262 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1263 if debug_search:
1263 if debug_search:
1264 delta_end = util.timer()
1264 delta_end = util.timer()
1265 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1265 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1266 msg %= delta_end - delta_start
1266 msg %= delta_end - delta_start
1267 self._write_debug(msg)
1267 self._write_debug(msg)
1268 if candidatedelta is not None:
1268 if candidatedelta is not None:
1269 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1269 if isgooddeltainfo(self.revlog, candidatedelta, revinfo):
1270 if debug_search:
1270 if debug_search:
1271 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1271 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1272 msg %= candidatedelta.deltalen
1272 msg %= candidatedelta.deltalen
1273 self._write_debug(msg)
1273 self._write_debug(msg)
1274 nominateddeltas.append(candidatedelta)
1274 nominateddeltas.append(candidatedelta)
1275 elif debug_search:
1275 elif debug_search:
1276 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1276 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1277 msg %= candidatedelta.deltalen
1277 msg %= candidatedelta.deltalen
1278 self._write_debug(msg)
1278 self._write_debug(msg)
1279 elif debug_search:
1279 elif debug_search:
1280 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1280 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1281 self._write_debug(msg)
1281 self._write_debug(msg)
1282 if nominateddeltas:
1282 if nominateddeltas:
1283 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1283 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1284 if deltainfo is not None:
1284 if deltainfo is not None:
1285 candidaterevs = groups.send(deltainfo.base)
1285 candidaterevs = groups.send(deltainfo.base)
1286 else:
1286 else:
1287 candidaterevs = next(groups)
1287 candidaterevs = next(groups)
1288
1288
1289 if deltainfo is None:
1289 if deltainfo is None:
1290 dbg_type = b"full"
1290 dbg_type = b"full"
1291 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1291 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1292 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1292 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1293 dbg_type = b"snapshot"
1293 dbg_type = b"snapshot"
1294 else:
1294 else:
1295 dbg_type = b"delta"
1295 dbg_type = b"delta"
1296
1296
1297 if gather_debug:
1297 if gather_debug:
1298 end = util.timer()
1298 end = util.timer()
1299 used_cached = (
1299 used_cached = (
1300 cachedelta is not None
1300 cachedelta is not None
1301 and dbg_try_rounds == 1
1301 and dbg_try_rounds == 1
1302 and dbg_try_count == 1
1302 and dbg_try_count == 1
1303 and deltainfo.base == cachedelta[0]
1303 and deltainfo.base == cachedelta[0]
1304 )
1304 )
1305 dbg = {
1305 dbg = {
1306 'duration': end - start,
1306 'duration': end - start,
1307 'revision': target_rev,
1307 'revision': target_rev,
1308 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1308 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1309 'search_round_count': dbg_try_rounds,
1309 'search_round_count': dbg_try_rounds,
1310 'using-cached-base': used_cached,
1310 'using-cached-base': used_cached,
1311 'delta_try_count': dbg_try_count,
1311 'delta_try_count': dbg_try_count,
1312 'type': dbg_type,
1312 'type': dbg_type,
1313 'p1-chain-len': p1_chain_len,
1313 'p1-chain-len': p1_chain_len,
1314 'p2-chain-len': p2_chain_len,
1314 'p2-chain-len': p2_chain_len,
1315 }
1315 }
1316 if (
1316 if (
1317 deltainfo.snapshotdepth # pytype: disable=attribute-error
1317 deltainfo.snapshotdepth # pytype: disable=attribute-error
1318 is not None
1318 is not None
1319 ):
1319 ):
1320 dbg[
1320 dbg[
1321 'snapshot-depth'
1321 'snapshot-depth'
1322 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1322 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1323 else:
1323 else:
1324 dbg['snapshot-depth'] = 0
1324 dbg['snapshot-depth'] = 0
1325 target_revlog = b"UNKNOWN"
1325 target_revlog = b"UNKNOWN"
1326 target_type = self.revlog.target[0]
1326 target_type = self.revlog.target[0]
1327 target_key = self.revlog.target[1]
1327 target_key = self.revlog.target[1]
1328 if target_type == KIND_CHANGELOG:
1328 if target_type == KIND_CHANGELOG:
1329 target_revlog = b'CHANGELOG:'
1329 target_revlog = b'CHANGELOG:'
1330 elif target_type == KIND_MANIFESTLOG:
1330 elif target_type == KIND_MANIFESTLOG:
1331 target_revlog = b'MANIFESTLOG:'
1331 target_revlog = b'MANIFESTLOG:'
1332 if target_key:
1332 if target_key:
1333 target_revlog += b'%s:' % target_key
1333 target_revlog += b'%s:' % target_key
1334 elif target_type == KIND_FILELOG:
1334 elif target_type == KIND_FILELOG:
1335 target_revlog = b'FILELOG:'
1335 target_revlog = b'FILELOG:'
1336 if target_key:
1336 if target_key:
1337 target_revlog += b'%s:' % target_key
1337 target_revlog += b'%s:' % target_key
1338 dbg['target-revlog'] = target_revlog
1338 dbg['target-revlog'] = target_revlog
1339
1339
1340 if self._debug_info is not None:
1340 if self._debug_info is not None:
1341 self._debug_info.append(dbg)
1341 self._debug_info.append(dbg)
1342
1342
1343 if self._write_debug is not None:
1343 if self._write_debug is not None:
1344 msg = (
1344 msg = (
1345 b"DBG-DELTAS:"
1345 b"DBG-DELTAS:"
1346 b" %-12s"
1346 b" %-12s"
1347 b" rev=%d:"
1347 b" rev=%d:"
1348 b" delta-base=%d"
1348 b" delta-base=%d"
1349 b" is-cached=%d"
1349 b" is-cached=%d"
1350 b" - search-rounds=%d"
1350 b" - search-rounds=%d"
1351 b" try-count=%d"
1351 b" try-count=%d"
1352 b" - delta-type=%-6s"
1352 b" - delta-type=%-6s"
1353 b" snap-depth=%d"
1353 b" snap-depth=%d"
1354 b" - p1-chain-length=%d"
1354 b" - p1-chain-length=%d"
1355 b" p2-chain-length=%d"
1355 b" p2-chain-length=%d"
1356 b" - duration=%f"
1356 b" - duration=%f"
1357 b"\n"
1357 b"\n"
1358 )
1358 )
1359 msg %= (
1359 msg %= (
1360 dbg["target-revlog"],
1360 dbg["target-revlog"],
1361 dbg["revision"],
1361 dbg["revision"],
1362 dbg["delta-base"],
1362 dbg["delta-base"],
1363 dbg["using-cached-base"],
1363 dbg["using-cached-base"],
1364 dbg["search_round_count"],
1364 dbg["search_round_count"],
1365 dbg["delta_try_count"],
1365 dbg["delta_try_count"],
1366 dbg["type"],
1366 dbg["type"],
1367 dbg["snapshot-depth"],
1367 dbg["snapshot-depth"],
1368 dbg["p1-chain-len"],
1368 dbg["p1-chain-len"],
1369 dbg["p2-chain-len"],
1369 dbg["p2-chain-len"],
1370 dbg["duration"],
1370 dbg["duration"],
1371 )
1371 )
1372 self._write_debug(msg)
1372 self._write_debug(msg)
1373 return deltainfo
1373 return deltainfo
1374
1374
1375
1375
1376 def delta_compression(default_compression_header, deltainfo):
1376 def delta_compression(default_compression_header, deltainfo):
1377 """return (COMPRESSION_MODE, deltainfo)
1377 """return (COMPRESSION_MODE, deltainfo)
1378
1378
1379 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1379 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1380 compression.
1380 compression.
1381 """
1381 """
1382 h, d = deltainfo.data
1382 h, d = deltainfo.data
1383 compression_mode = COMP_MODE_INLINE
1383 compression_mode = COMP_MODE_INLINE
1384 if not h and not d:
1384 if not h and not d:
1385 # not data to store at all... declare them uncompressed
1385 # not data to store at all... declare them uncompressed
1386 compression_mode = COMP_MODE_PLAIN
1386 compression_mode = COMP_MODE_PLAIN
1387 elif not h:
1387 elif not h:
1388 t = d[0:1]
1388 t = d[0:1]
1389 if t == b'\0':
1389 if t == b'\0':
1390 compression_mode = COMP_MODE_PLAIN
1390 compression_mode = COMP_MODE_PLAIN
1391 elif t == default_compression_header:
1391 elif t == default_compression_header:
1392 compression_mode = COMP_MODE_DEFAULT
1392 compression_mode = COMP_MODE_DEFAULT
1393 elif h == b'u':
1393 elif h == b'u':
1394 # we have a more efficient way to declare uncompressed
1394 # we have a more efficient way to declare uncompressed
1395 h = b''
1395 h = b''
1396 compression_mode = COMP_MODE_PLAIN
1396 compression_mode = COMP_MODE_PLAIN
1397 deltainfo = drop_u_compression(deltainfo)
1397 deltainfo = drop_u_compression(deltainfo)
1398 return compression_mode, deltainfo
1398 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now