##// END OF EJS Templates
delta-find: never do anything fancy when general delta is off...
marmoute -
r51331:02325712 stable
parent child Browse files
Show More
@@ -0,0 +1,196 b''
1 ============================================================================
2 Pulling from modern to a non-general delta target (and other related checks)
3 ============================================================================
4
5 There is various issue that can arise when we update the code with modern
6 storage in mind while working on delta processing. So this file is meant for
7 various scenario that might break in the future or have break in the past.
8
9 Setup
10 =====
11
12 Create a modern server with an older clone
13
14 $ cat << EOF >> $HGRCPATH
15 > [command-templates]
16 > log = "{desc} {tags}\n"
17 > EOF
18
19 $ hg init server
20
21 $ hg clone --quiet --pull server client --config format.usegeneraldelta=no
22 $ hg debugformat -R client | grep generaldelta
23 generaldelta: no
24
25 Create some complexe history
26
27 $ cd server
28 $ hg debugbuilddag -n '.+3:a$.+5:b/a:k$.+7:c/b:l$.+6:d/a:m<k+6/l+1/m'
29 $ hg log -G
30 o r36 tip
31 |\
32 | o r35
33 | |
34 | o r34
35 | |\
36 | | o r33
37 | | |
38 | | o r32
39 | | |
40 | | o r31
41 | | |
42 | | o r30
43 | | |
44 | | o r29
45 | | |
46 | | o r28
47 | | |
48 o | | r27 m
49 |\ \ \
50 | o | | r26 d
51 | | | |
52 | o | | r25
53 | | | |
54 | o | | r24
55 | | | |
56 | o | | r23
57 | | | |
58 | o | | r22
59 | | | |
60 | o | | r21
61 | | | |
62 | o | | r20
63 | / /
64 | o | r19 l
65 | |\ \
66 | | o | r18 c
67 | | | |
68 | | o | r17
69 | | | |
70 | | o | r16
71 | | | |
72 | | o | r15
73 | | | |
74 | | o | r14
75 | | | |
76 | | o | r13
77 | | | |
78 | | o | r12
79 | | | |
80 | | o | r11
81 | | /
82 +---o r10 k
83 | |/
84 | o r9 b
85 | |
86 | o r8
87 | |
88 | o r7
89 | |
90 | o r6
91 | |
92 | o r5
93 | |
94 | o r4
95 |
96 o r3 a
97 |
98 o r2
99 |
100 o r1
101 |
102 o r0
103
104 $ cd ..
105
106
107 Pull it in the client
108 =====================
109
110
111 pull with default value
112 -----------------------
113
114 $ cp -R client client-simple-pull
115 $ hg -R client-simple-pull pull
116 pulling from $TESTTMP/server
117 requesting all changes
118 adding changesets
119 adding manifests
120 adding file changes
121 added 37 changesets with 37 changes to 37 files
122 new changesets 61246295ee1e:b4b117cbbcf3
123 (run 'hg update' to get a working copy)
124 $ hg -R client-simple-pull verify
125 checking changesets
126 checking manifests
127 crosschecking files in changesets and manifests
128 checking files
129 checking dirstate
130 checked 37 changesets with 37 changes to 37 files
131
132
133 pull with "no-reuse" policy
134 ---------------------------
135
136 $ cp -R client client-no-reuse
137 $ hg -R client-no-reuse pull --config paths.default:pulled-delta-reuse-policy=no-reuse
138 pulling from $TESTTMP/server
139 requesting all changes
140 adding changesets
141 adding manifests
142 adding file changes
143 added 37 changesets with 37 changes to 37 files
144 new changesets 61246295ee1e:b4b117cbbcf3
145 (run 'hg update' to get a working copy)
146 $ hg -R client-no-reuse verify
147 checking changesets
148 checking manifests
149 crosschecking files in changesets and manifests
150 checking files
151 checking dirstate
152 checked 37 changesets with 37 changes to 37 files
153
154
155 pull with "try-base" policy
156 ---------------------------
157
158 $ cp -R client client-try-base
159 $ hg -R client-try-base pull --config paths.default:pulled-delta-reuse-policy=try-base
160 pulling from $TESTTMP/server
161 requesting all changes
162 adding changesets
163 adding manifests
164 adding file changes
165 added 37 changesets with 37 changes to 37 files
166 new changesets 61246295ee1e:b4b117cbbcf3
167 (run 'hg update' to get a working copy)
168 $ hg -R client-try-base verify
169 checking changesets
170 checking manifests
171 crosschecking files in changesets and manifests
172 checking files
173 checking dirstate
174 checked 37 changesets with 37 changes to 37 files
175
176
177 pull with "forced" policy
178 -------------------------
179
180 $ cp -R client client-forced
181 $ hg -R client-forced pull --config paths.default:pulled-delta-reuse-policy=forced
182 pulling from $TESTTMP/server
183 requesting all changes
184 adding changesets
185 adding manifests
186 adding file changes
187 added 37 changesets with 37 changes to 37 files
188 new changesets 61246295ee1e:b4b117cbbcf3
189 (run 'hg update' to get a working copy)
190 $ hg -R client-forced verify
191 checking changesets
192 checking manifests
193 crosschecking files in changesets and manifests
194 checking files
195 checking dirstate
196 checked 37 changesets with 37 changes to 37 files
@@ -1,1514 +1,1520 b''
1 # revlogdeltas.py - Logic around delta computation for revlog
1 # revlogdeltas.py - Logic around delta computation for revlog
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2018 Octobus <contact@octobus.net>
4 # Copyright 2018 Octobus <contact@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8 """Helper class to compute deltas stored inside revlogs"""
8 """Helper class to compute deltas stored inside revlogs"""
9
9
10
10
11 import collections
11 import collections
12 import struct
12 import struct
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from ..node import nullrev
15 from ..node import nullrev
16 from ..i18n import _
16 from ..i18n import _
17 from ..pycompat import getattr
17 from ..pycompat import getattr
18
18
19 from .constants import (
19 from .constants import (
20 COMP_MODE_DEFAULT,
20 COMP_MODE_DEFAULT,
21 COMP_MODE_INLINE,
21 COMP_MODE_INLINE,
22 COMP_MODE_PLAIN,
22 COMP_MODE_PLAIN,
23 DELTA_BASE_REUSE_FORCE,
23 DELTA_BASE_REUSE_FORCE,
24 DELTA_BASE_REUSE_NO,
24 DELTA_BASE_REUSE_NO,
25 KIND_CHANGELOG,
25 KIND_CHANGELOG,
26 KIND_FILELOG,
26 KIND_FILELOG,
27 KIND_MANIFESTLOG,
27 KIND_MANIFESTLOG,
28 REVIDX_ISCENSORED,
28 REVIDX_ISCENSORED,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
29 REVIDX_RAWTEXT_CHANGING_FLAGS,
30 )
30 )
31
31
32 from ..thirdparty import attr
32 from ..thirdparty import attr
33
33
34 from .. import (
34 from .. import (
35 error,
35 error,
36 mdiff,
36 mdiff,
37 util,
37 util,
38 )
38 )
39
39
40 from . import flagutil
40 from . import flagutil
41
41
42 # maximum <delta-chain-data>/<revision-text-length> ratio
42 # maximum <delta-chain-data>/<revision-text-length> ratio
43 LIMIT_DELTA2TEXT = 2
43 LIMIT_DELTA2TEXT = 2
44
44
45
45
46 class _testrevlog:
46 class _testrevlog:
47 """minimalist fake revlog to use in doctests"""
47 """minimalist fake revlog to use in doctests"""
48
48
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
49 def __init__(self, data, density=0.5, mingap=0, snapshot=()):
50 """data is an list of revision payload boundaries"""
50 """data is an list of revision payload boundaries"""
51 self._data = data
51 self._data = data
52 self._srdensitythreshold = density
52 self._srdensitythreshold = density
53 self._srmingapsize = mingap
53 self._srmingapsize = mingap
54 self._snapshot = set(snapshot)
54 self._snapshot = set(snapshot)
55 self.index = None
55 self.index = None
56
56
57 def start(self, rev):
57 def start(self, rev):
58 if rev == nullrev:
58 if rev == nullrev:
59 return 0
59 return 0
60 if rev == 0:
60 if rev == 0:
61 return 0
61 return 0
62 return self._data[rev - 1]
62 return self._data[rev - 1]
63
63
64 def end(self, rev):
64 def end(self, rev):
65 if rev == nullrev:
65 if rev == nullrev:
66 return 0
66 return 0
67 return self._data[rev]
67 return self._data[rev]
68
68
69 def length(self, rev):
69 def length(self, rev):
70 return self.end(rev) - self.start(rev)
70 return self.end(rev) - self.start(rev)
71
71
72 def __len__(self):
72 def __len__(self):
73 return len(self._data)
73 return len(self._data)
74
74
75 def issnapshot(self, rev):
75 def issnapshot(self, rev):
76 if rev == nullrev:
76 if rev == nullrev:
77 return True
77 return True
78 return rev in self._snapshot
78 return rev in self._snapshot
79
79
80
80
81 def slicechunk(revlog, revs, targetsize=None):
81 def slicechunk(revlog, revs, targetsize=None):
82 """slice revs to reduce the amount of unrelated data to be read from disk.
82 """slice revs to reduce the amount of unrelated data to be read from disk.
83
83
84 ``revs`` is sliced into groups that should be read in one time.
84 ``revs`` is sliced into groups that should be read in one time.
85 Assume that revs are sorted.
85 Assume that revs are sorted.
86
86
87 The initial chunk is sliced until the overall density (payload/chunks-span
87 The initial chunk is sliced until the overall density (payload/chunks-span
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
88 ratio) is above `revlog._srdensitythreshold`. No gap smaller than
89 `revlog._srmingapsize` is skipped.
89 `revlog._srmingapsize` is skipped.
90
90
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
91 If `targetsize` is set, no chunk larger than `targetsize` will be yield.
92 For consistency with other slicing choice, this limit won't go lower than
92 For consistency with other slicing choice, this limit won't go lower than
93 `revlog._srmingapsize`.
93 `revlog._srmingapsize`.
94
94
95 If individual revisions chunk are larger than this limit, they will still
95 If individual revisions chunk are larger than this limit, they will still
96 be raised individually.
96 be raised individually.
97
97
98 >>> data = [
98 >>> data = [
99 ... 5, #00 (5)
99 ... 5, #00 (5)
100 ... 10, #01 (5)
100 ... 10, #01 (5)
101 ... 12, #02 (2)
101 ... 12, #02 (2)
102 ... 12, #03 (empty)
102 ... 12, #03 (empty)
103 ... 27, #04 (15)
103 ... 27, #04 (15)
104 ... 31, #05 (4)
104 ... 31, #05 (4)
105 ... 31, #06 (empty)
105 ... 31, #06 (empty)
106 ... 42, #07 (11)
106 ... 42, #07 (11)
107 ... 47, #08 (5)
107 ... 47, #08 (5)
108 ... 47, #09 (empty)
108 ... 47, #09 (empty)
109 ... 48, #10 (1)
109 ... 48, #10 (1)
110 ... 51, #11 (3)
110 ... 51, #11 (3)
111 ... 74, #12 (23)
111 ... 74, #12 (23)
112 ... 85, #13 (11)
112 ... 85, #13 (11)
113 ... 86, #14 (1)
113 ... 86, #14 (1)
114 ... 91, #15 (5)
114 ... 91, #15 (5)
115 ... ]
115 ... ]
116 >>> revlog = _testrevlog(data, snapshot=range(16))
116 >>> revlog = _testrevlog(data, snapshot=range(16))
117
117
118 >>> list(slicechunk(revlog, list(range(16))))
118 >>> list(slicechunk(revlog, list(range(16))))
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
119 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
120 >>> list(slicechunk(revlog, [0, 15]))
120 >>> list(slicechunk(revlog, [0, 15]))
121 [[0], [15]]
121 [[0], [15]]
122 >>> list(slicechunk(revlog, [0, 11, 15]))
122 >>> list(slicechunk(revlog, [0, 11, 15]))
123 [[0], [11], [15]]
123 [[0], [11], [15]]
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
124 >>> list(slicechunk(revlog, [0, 11, 13, 15]))
125 [[0], [11, 13, 15]]
125 [[0], [11, 13, 15]]
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
126 >>> list(slicechunk(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
127 [[1, 2], [5, 8, 10, 11], [14]]
127 [[1, 2], [5, 8, 10, 11], [14]]
128
128
129 Slicing with a maximum chunk size
129 Slicing with a maximum chunk size
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
130 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=15))
131 [[0], [11], [13], [15]]
131 [[0], [11], [13], [15]]
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
132 >>> list(slicechunk(revlog, [0, 11, 13, 15], targetsize=20))
133 [[0], [11], [13, 15]]
133 [[0], [11], [13, 15]]
134
134
135 Slicing involving nullrev
135 Slicing involving nullrev
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
136 >>> list(slicechunk(revlog, [-1, 0, 11, 13, 15], targetsize=20))
137 [[-1, 0], [11], [13, 15]]
137 [[-1, 0], [11], [13, 15]]
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
138 >>> list(slicechunk(revlog, [-1, 13, 15], targetsize=5))
139 [[-1], [13], [15]]
139 [[-1], [13], [15]]
140 """
140 """
141 if targetsize is not None:
141 if targetsize is not None:
142 targetsize = max(targetsize, revlog._srmingapsize)
142 targetsize = max(targetsize, revlog._srmingapsize)
143 # targetsize should not be specified when evaluating delta candidates:
143 # targetsize should not be specified when evaluating delta candidates:
144 # * targetsize is used to ensure we stay within specification when reading,
144 # * targetsize is used to ensure we stay within specification when reading,
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
145 densityslicing = getattr(revlog.index, 'slicechunktodensity', None)
146 if densityslicing is None:
146 if densityslicing is None:
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
147 densityslicing = lambda x, y, z: _slicechunktodensity(revlog, x, y, z)
148 for chunk in densityslicing(
148 for chunk in densityslicing(
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
149 revs, revlog._srdensitythreshold, revlog._srmingapsize
150 ):
150 ):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
151 for subchunk in _slicechunktosize(revlog, chunk, targetsize):
152 yield subchunk
152 yield subchunk
153
153
154
154
155 def _slicechunktosize(revlog, revs, targetsize=None):
155 def _slicechunktosize(revlog, revs, targetsize=None):
156 """slice revs to match the target size
156 """slice revs to match the target size
157
157
158 This is intended to be used on chunk that density slicing selected by that
158 This is intended to be used on chunk that density slicing selected by that
159 are still too large compared to the read garantee of revlog. This might
159 are still too large compared to the read garantee of revlog. This might
160 happens when "minimal gap size" interrupted the slicing or when chain are
160 happens when "minimal gap size" interrupted the slicing or when chain are
161 built in a way that create large blocks next to each other.
161 built in a way that create large blocks next to each other.
162
162
163 >>> data = [
163 >>> data = [
164 ... 3, #0 (3)
164 ... 3, #0 (3)
165 ... 5, #1 (2)
165 ... 5, #1 (2)
166 ... 6, #2 (1)
166 ... 6, #2 (1)
167 ... 8, #3 (2)
167 ... 8, #3 (2)
168 ... 8, #4 (empty)
168 ... 8, #4 (empty)
169 ... 11, #5 (3)
169 ... 11, #5 (3)
170 ... 12, #6 (1)
170 ... 12, #6 (1)
171 ... 13, #7 (1)
171 ... 13, #7 (1)
172 ... 14, #8 (1)
172 ... 14, #8 (1)
173 ... ]
173 ... ]
174
174
175 == All snapshots cases ==
175 == All snapshots cases ==
176 >>> revlog = _testrevlog(data, snapshot=range(9))
176 >>> revlog = _testrevlog(data, snapshot=range(9))
177
177
178 Cases where chunk is already small enough
178 Cases where chunk is already small enough
179 >>> list(_slicechunktosize(revlog, [0], 3))
179 >>> list(_slicechunktosize(revlog, [0], 3))
180 [[0]]
180 [[0]]
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
181 >>> list(_slicechunktosize(revlog, [6, 7], 3))
182 [[6, 7]]
182 [[6, 7]]
183 >>> list(_slicechunktosize(revlog, [0], None))
183 >>> list(_slicechunktosize(revlog, [0], None))
184 [[0]]
184 [[0]]
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
185 >>> list(_slicechunktosize(revlog, [6, 7], None))
186 [[6, 7]]
186 [[6, 7]]
187
187
188 cases where we need actual slicing
188 cases where we need actual slicing
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
189 >>> list(_slicechunktosize(revlog, [0, 1], 3))
190 [[0], [1]]
190 [[0], [1]]
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
191 >>> list(_slicechunktosize(revlog, [1, 3], 3))
192 [[1], [3]]
192 [[1], [3]]
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
193 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
194 [[1, 2], [3]]
194 [[1, 2], [3]]
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
195 >>> list(_slicechunktosize(revlog, [3, 5], 3))
196 [[3], [5]]
196 [[3], [5]]
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
197 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
198 [[3], [5]]
198 [[3], [5]]
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
199 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
200 [[5], [6, 7, 8]]
200 [[5], [6, 7, 8]]
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
201 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
202 [[0], [1, 2], [3], [5], [6, 7, 8]]
203
203
204 Case with too large individual chunk (must return valid chunk)
204 Case with too large individual chunk (must return valid chunk)
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
205 >>> list(_slicechunktosize(revlog, [0, 1], 2))
206 [[0], [1]]
206 [[0], [1]]
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
207 >>> list(_slicechunktosize(revlog, [1, 3], 1))
208 [[1], [3]]
208 [[1], [3]]
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
209 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
210 [[3], [5]]
210 [[3], [5]]
211
211
212 == No Snapshot cases ==
212 == No Snapshot cases ==
213 >>> revlog = _testrevlog(data)
213 >>> revlog = _testrevlog(data)
214
214
215 Cases where chunk is already small enough
215 Cases where chunk is already small enough
216 >>> list(_slicechunktosize(revlog, [0], 3))
216 >>> list(_slicechunktosize(revlog, [0], 3))
217 [[0]]
217 [[0]]
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
218 >>> list(_slicechunktosize(revlog, [6, 7], 3))
219 [[6, 7]]
219 [[6, 7]]
220 >>> list(_slicechunktosize(revlog, [0], None))
220 >>> list(_slicechunktosize(revlog, [0], None))
221 [[0]]
221 [[0]]
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
222 >>> list(_slicechunktosize(revlog, [6, 7], None))
223 [[6, 7]]
223 [[6, 7]]
224
224
225 cases where we need actual slicing
225 cases where we need actual slicing
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
226 >>> list(_slicechunktosize(revlog, [0, 1], 3))
227 [[0], [1]]
227 [[0], [1]]
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
228 >>> list(_slicechunktosize(revlog, [1, 3], 3))
229 [[1], [3]]
229 [[1], [3]]
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
230 >>> list(_slicechunktosize(revlog, [1, 2, 3], 3))
231 [[1], [2, 3]]
231 [[1], [2, 3]]
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
232 >>> list(_slicechunktosize(revlog, [3, 5], 3))
233 [[3], [5]]
233 [[3], [5]]
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
234 >>> list(_slicechunktosize(revlog, [3, 4, 5], 3))
235 [[3], [4, 5]]
235 [[3], [4, 5]]
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
236 >>> list(_slicechunktosize(revlog, [5, 6, 7, 8], 3))
237 [[5], [6, 7, 8]]
237 [[5], [6, 7, 8]]
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
238 >>> list(_slicechunktosize(revlog, [0, 1, 2, 3, 4, 5, 6, 7, 8], 3))
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
239 [[0], [1, 2], [3], [5], [6, 7, 8]]
240
240
241 Case with too large individual chunk (must return valid chunk)
241 Case with too large individual chunk (must return valid chunk)
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
242 >>> list(_slicechunktosize(revlog, [0, 1], 2))
243 [[0], [1]]
243 [[0], [1]]
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
244 >>> list(_slicechunktosize(revlog, [1, 3], 1))
245 [[1], [3]]
245 [[1], [3]]
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
246 >>> list(_slicechunktosize(revlog, [3, 4, 5], 2))
247 [[3], [5]]
247 [[3], [5]]
248
248
249 == mixed case ==
249 == mixed case ==
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
250 >>> revlog = _testrevlog(data, snapshot=[0, 1, 2])
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
251 >>> list(_slicechunktosize(revlog, list(range(9)), 5))
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
252 [[0, 1], [2], [3, 4, 5], [6, 7, 8]]
253 """
253 """
254 assert targetsize is None or 0 <= targetsize
254 assert targetsize is None or 0 <= targetsize
255 startdata = revlog.start(revs[0])
255 startdata = revlog.start(revs[0])
256 enddata = revlog.end(revs[-1])
256 enddata = revlog.end(revs[-1])
257 fullspan = enddata - startdata
257 fullspan = enddata - startdata
258 if targetsize is None or fullspan <= targetsize:
258 if targetsize is None or fullspan <= targetsize:
259 yield revs
259 yield revs
260 return
260 return
261
261
262 startrevidx = 0
262 startrevidx = 0
263 endrevidx = 1
263 endrevidx = 1
264 iterrevs = enumerate(revs)
264 iterrevs = enumerate(revs)
265 next(iterrevs) # skip first rev.
265 next(iterrevs) # skip first rev.
266 # first step: get snapshots out of the way
266 # first step: get snapshots out of the way
267 for idx, r in iterrevs:
267 for idx, r in iterrevs:
268 span = revlog.end(r) - startdata
268 span = revlog.end(r) - startdata
269 snapshot = revlog.issnapshot(r)
269 snapshot = revlog.issnapshot(r)
270 if span <= targetsize and snapshot:
270 if span <= targetsize and snapshot:
271 endrevidx = idx + 1
271 endrevidx = idx + 1
272 else:
272 else:
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
273 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
274 if chunk:
274 if chunk:
275 yield chunk
275 yield chunk
276 startrevidx = idx
276 startrevidx = idx
277 startdata = revlog.start(r)
277 startdata = revlog.start(r)
278 endrevidx = idx + 1
278 endrevidx = idx + 1
279 if not snapshot:
279 if not snapshot:
280 break
280 break
281
281
282 # for the others, we use binary slicing to quickly converge toward valid
282 # for the others, we use binary slicing to quickly converge toward valid
283 # chunks (otherwise, we might end up looking for start/end of many
283 # chunks (otherwise, we might end up looking for start/end of many
284 # revisions). This logic is not looking for the perfect slicing point, it
284 # revisions). This logic is not looking for the perfect slicing point, it
285 # focuses on quickly converging toward valid chunks.
285 # focuses on quickly converging toward valid chunks.
286 nbitem = len(revs)
286 nbitem = len(revs)
287 while (enddata - startdata) > targetsize:
287 while (enddata - startdata) > targetsize:
288 endrevidx = nbitem
288 endrevidx = nbitem
289 if nbitem - startrevidx <= 1:
289 if nbitem - startrevidx <= 1:
290 break # protect against individual chunk larger than limit
290 break # protect against individual chunk larger than limit
291 localenddata = revlog.end(revs[endrevidx - 1])
291 localenddata = revlog.end(revs[endrevidx - 1])
292 span = localenddata - startdata
292 span = localenddata - startdata
293 while span > targetsize:
293 while span > targetsize:
294 if endrevidx - startrevidx <= 1:
294 if endrevidx - startrevidx <= 1:
295 break # protect against individual chunk larger than limit
295 break # protect against individual chunk larger than limit
296 endrevidx -= (endrevidx - startrevidx) // 2
296 endrevidx -= (endrevidx - startrevidx) // 2
297 localenddata = revlog.end(revs[endrevidx - 1])
297 localenddata = revlog.end(revs[endrevidx - 1])
298 span = localenddata - startdata
298 span = localenddata - startdata
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
299 chunk = _trimchunk(revlog, revs, startrevidx, endrevidx)
300 if chunk:
300 if chunk:
301 yield chunk
301 yield chunk
302 startrevidx = endrevidx
302 startrevidx = endrevidx
303 startdata = revlog.start(revs[startrevidx])
303 startdata = revlog.start(revs[startrevidx])
304
304
305 chunk = _trimchunk(revlog, revs, startrevidx)
305 chunk = _trimchunk(revlog, revs, startrevidx)
306 if chunk:
306 if chunk:
307 yield chunk
307 yield chunk
308
308
309
309
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
310 def _slicechunktodensity(revlog, revs, targetdensity=0.5, mingapsize=0):
311 """slice revs to reduce the amount of unrelated data to be read from disk.
311 """slice revs to reduce the amount of unrelated data to be read from disk.
312
312
313 ``revs`` is sliced into groups that should be read in one time.
313 ``revs`` is sliced into groups that should be read in one time.
314 Assume that revs are sorted.
314 Assume that revs are sorted.
315
315
316 The initial chunk is sliced until the overall density (payload/chunks-span
316 The initial chunk is sliced until the overall density (payload/chunks-span
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
317 ratio) is above `targetdensity`. No gap smaller than `mingapsize` is
318 skipped.
318 skipped.
319
319
320 >>> revlog = _testrevlog([
320 >>> revlog = _testrevlog([
321 ... 5, #00 (5)
321 ... 5, #00 (5)
322 ... 10, #01 (5)
322 ... 10, #01 (5)
323 ... 12, #02 (2)
323 ... 12, #02 (2)
324 ... 12, #03 (empty)
324 ... 12, #03 (empty)
325 ... 27, #04 (15)
325 ... 27, #04 (15)
326 ... 31, #05 (4)
326 ... 31, #05 (4)
327 ... 31, #06 (empty)
327 ... 31, #06 (empty)
328 ... 42, #07 (11)
328 ... 42, #07 (11)
329 ... 47, #08 (5)
329 ... 47, #08 (5)
330 ... 47, #09 (empty)
330 ... 47, #09 (empty)
331 ... 48, #10 (1)
331 ... 48, #10 (1)
332 ... 51, #11 (3)
332 ... 51, #11 (3)
333 ... 74, #12 (23)
333 ... 74, #12 (23)
334 ... 85, #13 (11)
334 ... 85, #13 (11)
335 ... 86, #14 (1)
335 ... 86, #14 (1)
336 ... 91, #15 (5)
336 ... 91, #15 (5)
337 ... ])
337 ... ])
338
338
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
339 >>> list(_slicechunktodensity(revlog, list(range(16))))
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
340 [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]]
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
341 >>> list(_slicechunktodensity(revlog, [0, 15]))
342 [[0], [15]]
342 [[0], [15]]
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
343 >>> list(_slicechunktodensity(revlog, [0, 11, 15]))
344 [[0], [11], [15]]
344 [[0], [11], [15]]
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
345 >>> list(_slicechunktodensity(revlog, [0, 11, 13, 15]))
346 [[0], [11, 13, 15]]
346 [[0], [11, 13, 15]]
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
347 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14]))
348 [[1, 2], [5, 8, 10, 11], [14]]
348 [[1, 2], [5, 8, 10, 11], [14]]
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
349 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
350 ... mingapsize=20))
350 ... mingapsize=20))
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
351 [[1, 2, 3, 5, 8, 10, 11], [14]]
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
352 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
353 ... targetdensity=0.95))
353 ... targetdensity=0.95))
354 [[1, 2], [5], [8, 10, 11], [14]]
354 [[1, 2], [5], [8, 10, 11], [14]]
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
355 >>> list(_slicechunktodensity(revlog, [1, 2, 3, 5, 8, 10, 11, 14],
356 ... targetdensity=0.95, mingapsize=12))
356 ... targetdensity=0.95, mingapsize=12))
357 [[1, 2], [5, 8, 10, 11], [14]]
357 [[1, 2], [5, 8, 10, 11], [14]]
358 """
358 """
359 start = revlog.start
359 start = revlog.start
360 length = revlog.length
360 length = revlog.length
361
361
362 if len(revs) <= 1:
362 if len(revs) <= 1:
363 yield revs
363 yield revs
364 return
364 return
365
365
366 deltachainspan = segmentspan(revlog, revs)
366 deltachainspan = segmentspan(revlog, revs)
367
367
368 if deltachainspan < mingapsize:
368 if deltachainspan < mingapsize:
369 yield revs
369 yield revs
370 return
370 return
371
371
372 readdata = deltachainspan
372 readdata = deltachainspan
373 chainpayload = sum(length(r) for r in revs)
373 chainpayload = sum(length(r) for r in revs)
374
374
375 if deltachainspan:
375 if deltachainspan:
376 density = chainpayload / float(deltachainspan)
376 density = chainpayload / float(deltachainspan)
377 else:
377 else:
378 density = 1.0
378 density = 1.0
379
379
380 if density >= targetdensity:
380 if density >= targetdensity:
381 yield revs
381 yield revs
382 return
382 return
383
383
384 # Store the gaps in a heap to have them sorted by decreasing size
384 # Store the gaps in a heap to have them sorted by decreasing size
385 gaps = []
385 gaps = []
386 prevend = None
386 prevend = None
387 for i, rev in enumerate(revs):
387 for i, rev in enumerate(revs):
388 revstart = start(rev)
388 revstart = start(rev)
389 revlen = length(rev)
389 revlen = length(rev)
390
390
391 # Skip empty revisions to form larger holes
391 # Skip empty revisions to form larger holes
392 if revlen == 0:
392 if revlen == 0:
393 continue
393 continue
394
394
395 if prevend is not None:
395 if prevend is not None:
396 gapsize = revstart - prevend
396 gapsize = revstart - prevend
397 # only consider holes that are large enough
397 # only consider holes that are large enough
398 if gapsize > mingapsize:
398 if gapsize > mingapsize:
399 gaps.append((gapsize, i))
399 gaps.append((gapsize, i))
400
400
401 prevend = revstart + revlen
401 prevend = revstart + revlen
402 # sort the gaps to pop them from largest to small
402 # sort the gaps to pop them from largest to small
403 gaps.sort()
403 gaps.sort()
404
404
405 # Collect the indices of the largest holes until the density is acceptable
405 # Collect the indices of the largest holes until the density is acceptable
406 selected = []
406 selected = []
407 while gaps and density < targetdensity:
407 while gaps and density < targetdensity:
408 gapsize, gapidx = gaps.pop()
408 gapsize, gapidx = gaps.pop()
409
409
410 selected.append(gapidx)
410 selected.append(gapidx)
411
411
412 # the gap sizes are stored as negatives to be sorted decreasingly
412 # the gap sizes are stored as negatives to be sorted decreasingly
413 # by the heap
413 # by the heap
414 readdata -= gapsize
414 readdata -= gapsize
415 if readdata > 0:
415 if readdata > 0:
416 density = chainpayload / float(readdata)
416 density = chainpayload / float(readdata)
417 else:
417 else:
418 density = 1.0
418 density = 1.0
419 selected.sort()
419 selected.sort()
420
420
421 # Cut the revs at collected indices
421 # Cut the revs at collected indices
422 previdx = 0
422 previdx = 0
423 for idx in selected:
423 for idx in selected:
424
424
425 chunk = _trimchunk(revlog, revs, previdx, idx)
425 chunk = _trimchunk(revlog, revs, previdx, idx)
426 if chunk:
426 if chunk:
427 yield chunk
427 yield chunk
428
428
429 previdx = idx
429 previdx = idx
430
430
431 chunk = _trimchunk(revlog, revs, previdx)
431 chunk = _trimchunk(revlog, revs, previdx)
432 if chunk:
432 if chunk:
433 yield chunk
433 yield chunk
434
434
435
435
436 def _trimchunk(revlog, revs, startidx, endidx=None):
436 def _trimchunk(revlog, revs, startidx, endidx=None):
437 """returns revs[startidx:endidx] without empty trailing revs
437 """returns revs[startidx:endidx] without empty trailing revs
438
438
439 Doctest Setup
439 Doctest Setup
440 >>> revlog = _testrevlog([
440 >>> revlog = _testrevlog([
441 ... 5, #0
441 ... 5, #0
442 ... 10, #1
442 ... 10, #1
443 ... 12, #2
443 ... 12, #2
444 ... 12, #3 (empty)
444 ... 12, #3 (empty)
445 ... 17, #4
445 ... 17, #4
446 ... 21, #5
446 ... 21, #5
447 ... 21, #6 (empty)
447 ... 21, #6 (empty)
448 ... ])
448 ... ])
449
449
450 Contiguous cases:
450 Contiguous cases:
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
451 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0)
452 [0, 1, 2, 3, 4, 5]
452 [0, 1, 2, 3, 4, 5]
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
453 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 5)
454 [0, 1, 2, 3, 4]
454 [0, 1, 2, 3, 4]
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
455 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 0, 4)
456 [0, 1, 2]
456 [0, 1, 2]
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
457 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 2, 4)
458 [2]
458 [2]
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
459 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3)
460 [3, 4, 5]
460 [3, 4, 5]
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
461 >>> _trimchunk(revlog, [0, 1, 2, 3, 4, 5, 6], 3, 5)
462 [3, 4]
462 [3, 4]
463
463
464 Discontiguous cases:
464 Discontiguous cases:
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
465 >>> _trimchunk(revlog, [1, 3, 5, 6], 0)
466 [1, 3, 5]
466 [1, 3, 5]
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
467 >>> _trimchunk(revlog, [1, 3, 5, 6], 0, 2)
468 [1]
468 [1]
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
469 >>> _trimchunk(revlog, [1, 3, 5, 6], 1, 3)
470 [3, 5]
470 [3, 5]
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
471 >>> _trimchunk(revlog, [1, 3, 5, 6], 1)
472 [3, 5]
472 [3, 5]
473 """
473 """
474 length = revlog.length
474 length = revlog.length
475
475
476 if endidx is None:
476 if endidx is None:
477 endidx = len(revs)
477 endidx = len(revs)
478
478
479 # If we have a non-emtpy delta candidate, there are nothing to trim
479 # If we have a non-emtpy delta candidate, there are nothing to trim
480 if revs[endidx - 1] < len(revlog):
480 if revs[endidx - 1] < len(revlog):
481 # Trim empty revs at the end, except the very first revision of a chain
481 # Trim empty revs at the end, except the very first revision of a chain
482 while (
482 while (
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
483 endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0
484 ):
484 ):
485 endidx -= 1
485 endidx -= 1
486
486
487 return revs[startidx:endidx]
487 return revs[startidx:endidx]
488
488
489
489
490 def segmentspan(revlog, revs):
490 def segmentspan(revlog, revs):
491 """Get the byte span of a segment of revisions
491 """Get the byte span of a segment of revisions
492
492
493 revs is a sorted array of revision numbers
493 revs is a sorted array of revision numbers
494
494
495 >>> revlog = _testrevlog([
495 >>> revlog = _testrevlog([
496 ... 5, #0
496 ... 5, #0
497 ... 10, #1
497 ... 10, #1
498 ... 12, #2
498 ... 12, #2
499 ... 12, #3 (empty)
499 ... 12, #3 (empty)
500 ... 17, #4
500 ... 17, #4
501 ... ])
501 ... ])
502
502
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
503 >>> segmentspan(revlog, [0, 1, 2, 3, 4])
504 17
504 17
505 >>> segmentspan(revlog, [0, 4])
505 >>> segmentspan(revlog, [0, 4])
506 17
506 17
507 >>> segmentspan(revlog, [3, 4])
507 >>> segmentspan(revlog, [3, 4])
508 5
508 5
509 >>> segmentspan(revlog, [1, 2, 3,])
509 >>> segmentspan(revlog, [1, 2, 3,])
510 7
510 7
511 >>> segmentspan(revlog, [1, 3])
511 >>> segmentspan(revlog, [1, 3])
512 7
512 7
513 """
513 """
514 if not revs:
514 if not revs:
515 return 0
515 return 0
516 end = revlog.end(revs[-1])
516 end = revlog.end(revs[-1])
517 return end - revlog.start(revs[0])
517 return end - revlog.start(revs[0])
518
518
519
519
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
520 def _textfromdelta(fh, revlog, baserev, delta, p1, p2, flags, expectednode):
521 """build full text from a (base, delta) pair and other metadata"""
521 """build full text from a (base, delta) pair and other metadata"""
522 # special case deltas which replace entire base; no need to decode
522 # special case deltas which replace entire base; no need to decode
523 # base revision. this neatly avoids censored bases, which throw when
523 # base revision. this neatly avoids censored bases, which throw when
524 # they're decoded.
524 # they're decoded.
525 hlen = struct.calcsize(b">lll")
525 hlen = struct.calcsize(b">lll")
526 if delta[:hlen] == mdiff.replacediffheader(
526 if delta[:hlen] == mdiff.replacediffheader(
527 revlog.rawsize(baserev), len(delta) - hlen
527 revlog.rawsize(baserev), len(delta) - hlen
528 ):
528 ):
529 fulltext = delta[hlen:]
529 fulltext = delta[hlen:]
530 else:
530 else:
531 # deltabase is rawtext before changed by flag processors, which is
531 # deltabase is rawtext before changed by flag processors, which is
532 # equivalent to non-raw text
532 # equivalent to non-raw text
533 basetext = revlog.revision(baserev, _df=fh)
533 basetext = revlog.revision(baserev, _df=fh)
534 fulltext = mdiff.patch(basetext, delta)
534 fulltext = mdiff.patch(basetext, delta)
535
535
536 try:
536 try:
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
537 validatehash = flagutil.processflagsraw(revlog, fulltext, flags)
538 if validatehash:
538 if validatehash:
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
539 revlog.checkhash(fulltext, expectednode, p1=p1, p2=p2)
540 if flags & REVIDX_ISCENSORED:
540 if flags & REVIDX_ISCENSORED:
541 raise error.StorageError(
541 raise error.StorageError(
542 _(b'node %s is not censored') % expectednode
542 _(b'node %s is not censored') % expectednode
543 )
543 )
544 except error.CensoredNodeError:
544 except error.CensoredNodeError:
545 # must pass the censored index flag to add censored revisions
545 # must pass the censored index flag to add censored revisions
546 if not flags & REVIDX_ISCENSORED:
546 if not flags & REVIDX_ISCENSORED:
547 raise
547 raise
548 return fulltext
548 return fulltext
549
549
550
550
551 @attr.s(slots=True, frozen=True)
551 @attr.s(slots=True, frozen=True)
552 class _deltainfo:
552 class _deltainfo:
553 distance = attr.ib()
553 distance = attr.ib()
554 deltalen = attr.ib()
554 deltalen = attr.ib()
555 data = attr.ib()
555 data = attr.ib()
556 base = attr.ib()
556 base = attr.ib()
557 chainbase = attr.ib()
557 chainbase = attr.ib()
558 chainlen = attr.ib()
558 chainlen = attr.ib()
559 compresseddeltalen = attr.ib()
559 compresseddeltalen = attr.ib()
560 snapshotdepth = attr.ib()
560 snapshotdepth = attr.ib()
561
561
562
562
563 def drop_u_compression(delta):
563 def drop_u_compression(delta):
564 """turn into a "u" (no-compression) into no-compression without header
564 """turn into a "u" (no-compression) into no-compression without header
565
565
566 This is useful for revlog format that has better compression method.
566 This is useful for revlog format that has better compression method.
567 """
567 """
568 assert delta.data[0] == b'u', delta.data[0]
568 assert delta.data[0] == b'u', delta.data[0]
569 return _deltainfo(
569 return _deltainfo(
570 delta.distance,
570 delta.distance,
571 delta.deltalen - 1,
571 delta.deltalen - 1,
572 (b'', delta.data[1]),
572 (b'', delta.data[1]),
573 delta.base,
573 delta.base,
574 delta.chainbase,
574 delta.chainbase,
575 delta.chainlen,
575 delta.chainlen,
576 delta.compresseddeltalen,
576 delta.compresseddeltalen,
577 delta.snapshotdepth,
577 delta.snapshotdepth,
578 )
578 )
579
579
580
580
581 def is_good_delta_info(revlog, deltainfo, revinfo):
581 def is_good_delta_info(revlog, deltainfo, revinfo):
582 """Returns True if the given delta is good. Good means that it is within
582 """Returns True if the given delta is good. Good means that it is within
583 the disk span, disk size, and chain length bounds that we know to be
583 the disk span, disk size, and chain length bounds that we know to be
584 performant."""
584 performant."""
585 if deltainfo is None:
585 if deltainfo is None:
586 return False
586 return False
587
587
588 if (
588 if (
589 revinfo.cachedelta is not None
589 revinfo.cachedelta is not None
590 and deltainfo.base == revinfo.cachedelta[0]
590 and deltainfo.base == revinfo.cachedelta[0]
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
591 and revinfo.cachedelta[2] == DELTA_BASE_REUSE_FORCE
592 ):
592 ):
593 return True
593 return True
594
594
595 # - 'deltainfo.distance' is the distance from the base revision --
595 # - 'deltainfo.distance' is the distance from the base revision --
596 # bounding it limits the amount of I/O we need to do.
596 # bounding it limits the amount of I/O we need to do.
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
597 # - 'deltainfo.compresseddeltalen' is the sum of the total size of
598 # deltas we need to apply -- bounding it limits the amount of CPU
598 # deltas we need to apply -- bounding it limits the amount of CPU
599 # we consume.
599 # we consume.
600
600
601 textlen = revinfo.textlen
601 textlen = revinfo.textlen
602 defaultmax = textlen * 4
602 defaultmax = textlen * 4
603 maxdist = revlog._maxdeltachainspan
603 maxdist = revlog._maxdeltachainspan
604 if not maxdist:
604 if not maxdist:
605 maxdist = deltainfo.distance # ensure the conditional pass
605 maxdist = deltainfo.distance # ensure the conditional pass
606 maxdist = max(maxdist, defaultmax)
606 maxdist = max(maxdist, defaultmax)
607
607
608 # Bad delta from read span:
608 # Bad delta from read span:
609 #
609 #
610 # If the span of data read is larger than the maximum allowed.
610 # If the span of data read is larger than the maximum allowed.
611 #
611 #
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
612 # In the sparse-revlog case, we rely on the associated "sparse reading"
613 # to avoid issue related to the span of data. In theory, it would be
613 # to avoid issue related to the span of data. In theory, it would be
614 # possible to build pathological revlog where delta pattern would lead
614 # possible to build pathological revlog where delta pattern would lead
615 # to too many reads. However, they do not happen in practice at all. So
615 # to too many reads. However, they do not happen in practice at all. So
616 # we skip the span check entirely.
616 # we skip the span check entirely.
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
617 if not revlog._sparserevlog and maxdist < deltainfo.distance:
618 return False
618 return False
619
619
620 # Bad delta from new delta size:
620 # Bad delta from new delta size:
621 #
621 #
622 # If the delta size is larger than the target text, storing the
622 # If the delta size is larger than the target text, storing the
623 # delta will be inefficient.
623 # delta will be inefficient.
624 if textlen < deltainfo.deltalen:
624 if textlen < deltainfo.deltalen:
625 return False
625 return False
626
626
627 # Bad delta from cumulated payload size:
627 # Bad delta from cumulated payload size:
628 #
628 #
629 # If the sum of delta get larger than K * target text length.
629 # If the sum of delta get larger than K * target text length.
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
630 if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen:
631 return False
631 return False
632
632
633 # Bad delta from chain length:
633 # Bad delta from chain length:
634 #
634 #
635 # If the number of delta in the chain gets too high.
635 # If the number of delta in the chain gets too high.
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
636 if revlog._maxchainlen and revlog._maxchainlen < deltainfo.chainlen:
637 return False
637 return False
638
638
639 # bad delta from intermediate snapshot size limit
639 # bad delta from intermediate snapshot size limit
640 #
640 #
641 # If an intermediate snapshot size is higher than the limit. The
641 # If an intermediate snapshot size is higher than the limit. The
642 # limit exist to prevent endless chain of intermediate delta to be
642 # limit exist to prevent endless chain of intermediate delta to be
643 # created.
643 # created.
644 if (
644 if (
645 deltainfo.snapshotdepth is not None
645 deltainfo.snapshotdepth is not None
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
646 and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen
647 ):
647 ):
648 return False
648 return False
649
649
650 # bad delta if new intermediate snapshot is larger than the previous
650 # bad delta if new intermediate snapshot is larger than the previous
651 # snapshot
651 # snapshot
652 if (
652 if (
653 deltainfo.snapshotdepth
653 deltainfo.snapshotdepth
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
654 and revlog.length(deltainfo.base) < deltainfo.deltalen
655 ):
655 ):
656 return False
656 return False
657
657
658 return True
658 return True
659
659
660
660
661 # If a revision's full text is that much bigger than a base candidate full
661 # If a revision's full text is that much bigger than a base candidate full
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
662 # text's, it is very unlikely that it will produce a valid delta. We no longer
663 # consider these candidates.
663 # consider these candidates.
664 LIMIT_BASE2TEXT = 500
664 LIMIT_BASE2TEXT = 500
665
665
666
666
667 def _candidategroups(
667 def _candidategroups(
668 revlog,
668 revlog,
669 textlen,
669 textlen,
670 p1,
670 p1,
671 p2,
671 p2,
672 cachedelta,
672 cachedelta,
673 excluded_bases=None,
673 excluded_bases=None,
674 target_rev=None,
674 target_rev=None,
675 snapshot_cache=None,
675 snapshot_cache=None,
676 ):
676 ):
677 """Provides group of revision to be tested as delta base
677 """Provides group of revision to be tested as delta base
678
678
679 This top level function focus on emitting groups with unique and worthwhile
679 This top level function focus on emitting groups with unique and worthwhile
680 content. See _raw_candidate_groups for details about the group order.
680 content. See _raw_candidate_groups for details about the group order.
681 """
681 """
682 # should we try to build a delta?
682 # should we try to build a delta?
683 if not (len(revlog) and revlog._storedeltachains):
683 if not (len(revlog) and revlog._storedeltachains):
684 yield None
684 yield None
685 return
685 return
686
686
687 if target_rev is None:
687 if target_rev is None:
688 target_rev = len(revlog)
688 target_rev = len(revlog)
689
689
690 if not revlog._generaldelta:
691 # before general delta, there is only one possible delta base
692 yield (target_rev - 1,)
693 yield None
694 return
695
690 if (
696 if (
691 cachedelta is not None
697 cachedelta is not None
692 and nullrev == cachedelta[0]
698 and nullrev == cachedelta[0]
693 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
699 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
694 ):
700 ):
695 # instruction are to forcibly do a full snapshot
701 # instruction are to forcibly do a full snapshot
696 yield None
702 yield None
697 return
703 return
698
704
699 deltalength = revlog.length
705 deltalength = revlog.length
700 deltaparent = revlog.deltaparent
706 deltaparent = revlog.deltaparent
701 sparse = revlog._sparserevlog
707 sparse = revlog._sparserevlog
702 good = None
708 good = None
703
709
704 deltas_limit = textlen * LIMIT_DELTA2TEXT
710 deltas_limit = textlen * LIMIT_DELTA2TEXT
705 group_chunk_size = revlog._candidate_group_chunk_size
711 group_chunk_size = revlog._candidate_group_chunk_size
706
712
707 tested = {nullrev}
713 tested = {nullrev}
708 candidates = _refinedgroups(
714 candidates = _refinedgroups(
709 revlog,
715 revlog,
710 p1,
716 p1,
711 p2,
717 p2,
712 cachedelta,
718 cachedelta,
713 snapshot_cache=snapshot_cache,
719 snapshot_cache=snapshot_cache,
714 )
720 )
715 while True:
721 while True:
716 temptative = candidates.send(good)
722 temptative = candidates.send(good)
717 if temptative is None:
723 if temptative is None:
718 break
724 break
719 group = []
725 group = []
720 for rev in temptative:
726 for rev in temptative:
721 # skip over empty delta (no need to include them in a chain)
727 # skip over empty delta (no need to include them in a chain)
722 while revlog._generaldelta and not (
728 while revlog._generaldelta and not (
723 rev == nullrev or rev in tested or deltalength(rev)
729 rev == nullrev or rev in tested or deltalength(rev)
724 ):
730 ):
725 tested.add(rev)
731 tested.add(rev)
726 rev = deltaparent(rev)
732 rev = deltaparent(rev)
727 # no need to try a delta against nullrev, this will be done as a
733 # no need to try a delta against nullrev, this will be done as a
728 # last resort.
734 # last resort.
729 if rev == nullrev:
735 if rev == nullrev:
730 continue
736 continue
731 # filter out revision we tested already
737 # filter out revision we tested already
732 if rev in tested:
738 if rev in tested:
733 continue
739 continue
734
740
735 if (
741 if (
736 cachedelta is not None
742 cachedelta is not None
737 and rev == cachedelta[0]
743 and rev == cachedelta[0]
738 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
744 and cachedelta[2] == DELTA_BASE_REUSE_FORCE
739 ):
745 ):
740 # instructions are to forcibly consider/use this delta base
746 # instructions are to forcibly consider/use this delta base
741 group.append(rev)
747 group.append(rev)
742 continue
748 continue
743
749
744 # an higher authority deamed the base unworthy (e.g. censored)
750 # an higher authority deamed the base unworthy (e.g. censored)
745 if excluded_bases is not None and rev in excluded_bases:
751 if excluded_bases is not None and rev in excluded_bases:
746 tested.add(rev)
752 tested.add(rev)
747 continue
753 continue
748 # We are in some recomputation cases and that rev is too high in
754 # We are in some recomputation cases and that rev is too high in
749 # the revlog
755 # the revlog
750 if target_rev is not None and rev >= target_rev:
756 if target_rev is not None and rev >= target_rev:
751 tested.add(rev)
757 tested.add(rev)
752 continue
758 continue
753 # filter out delta base that will never produce good delta
759 # filter out delta base that will never produce good delta
754 if deltas_limit < revlog.length(rev):
760 if deltas_limit < revlog.length(rev):
755 tested.add(rev)
761 tested.add(rev)
756 continue
762 continue
757 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
763 if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT):
758 tested.add(rev)
764 tested.add(rev)
759 continue
765 continue
760 # no delta for rawtext-changing revs (see "candelta" for why)
766 # no delta for rawtext-changing revs (see "candelta" for why)
761 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
767 if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS:
762 tested.add(rev)
768 tested.add(rev)
763 continue
769 continue
764
770
765 # If we reach here, we are about to build and test a delta.
771 # If we reach here, we are about to build and test a delta.
766 # The delta building process will compute the chaininfo in all
772 # The delta building process will compute the chaininfo in all
767 # case, since that computation is cached, it is fine to access it
773 # case, since that computation is cached, it is fine to access it
768 # here too.
774 # here too.
769 chainlen, chainsize = revlog._chaininfo(rev)
775 chainlen, chainsize = revlog._chaininfo(rev)
770 # if chain will be too long, skip base
776 # if chain will be too long, skip base
771 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
777 if revlog._maxchainlen and chainlen >= revlog._maxchainlen:
772 tested.add(rev)
778 tested.add(rev)
773 continue
779 continue
774 # if chain already have too much data, skip base
780 # if chain already have too much data, skip base
775 if deltas_limit < chainsize:
781 if deltas_limit < chainsize:
776 tested.add(rev)
782 tested.add(rev)
777 continue
783 continue
778 if sparse and revlog.upperboundcomp is not None:
784 if sparse and revlog.upperboundcomp is not None:
779 maxcomp = revlog.upperboundcomp
785 maxcomp = revlog.upperboundcomp
780 basenotsnap = (p1, p2, nullrev)
786 basenotsnap = (p1, p2, nullrev)
781 if rev not in basenotsnap and revlog.issnapshot(rev):
787 if rev not in basenotsnap and revlog.issnapshot(rev):
782 snapshotdepth = revlog.snapshotdepth(rev)
788 snapshotdepth = revlog.snapshotdepth(rev)
783 # If text is significantly larger than the base, we can
789 # If text is significantly larger than the base, we can
784 # expect the resulting delta to be proportional to the size
790 # expect the resulting delta to be proportional to the size
785 # difference
791 # difference
786 revsize = revlog.rawsize(rev)
792 revsize = revlog.rawsize(rev)
787 rawsizedistance = max(textlen - revsize, 0)
793 rawsizedistance = max(textlen - revsize, 0)
788 # use an estimate of the compression upper bound.
794 # use an estimate of the compression upper bound.
789 lowestrealisticdeltalen = rawsizedistance // maxcomp
795 lowestrealisticdeltalen = rawsizedistance // maxcomp
790
796
791 # check the absolute constraint on the delta size
797 # check the absolute constraint on the delta size
792 snapshotlimit = textlen >> snapshotdepth
798 snapshotlimit = textlen >> snapshotdepth
793 if snapshotlimit < lowestrealisticdeltalen:
799 if snapshotlimit < lowestrealisticdeltalen:
794 # delta lower bound is larger than accepted upper bound
800 # delta lower bound is larger than accepted upper bound
795 tested.add(rev)
801 tested.add(rev)
796 continue
802 continue
797
803
798 # check the relative constraint on the delta size
804 # check the relative constraint on the delta size
799 revlength = revlog.length(rev)
805 revlength = revlog.length(rev)
800 if revlength < lowestrealisticdeltalen:
806 if revlength < lowestrealisticdeltalen:
801 # delta probable lower bound is larger than target base
807 # delta probable lower bound is larger than target base
802 tested.add(rev)
808 tested.add(rev)
803 continue
809 continue
804
810
805 group.append(rev)
811 group.append(rev)
806 if group:
812 if group:
807 # When the size of the candidate group is big, it can result in a
813 # When the size of the candidate group is big, it can result in a
808 # quite significant performance impact. To reduce this, we can send
814 # quite significant performance impact. To reduce this, we can send
809 # them in smaller batches until the new batch does not provide any
815 # them in smaller batches until the new batch does not provide any
810 # improvements.
816 # improvements.
811 #
817 #
812 # This might reduce the overall efficiency of the compression in
818 # This might reduce the overall efficiency of the compression in
813 # some corner cases, but that should also prevent very pathological
819 # some corner cases, but that should also prevent very pathological
814 # cases from being an issue. (eg. 20 000 candidates).
820 # cases from being an issue. (eg. 20 000 candidates).
815 #
821 #
816 # XXX note that the ordering of the group becomes important as it
822 # XXX note that the ordering of the group becomes important as it
817 # now impacts the final result. The current order is unprocessed
823 # now impacts the final result. The current order is unprocessed
818 # and can be improved.
824 # and can be improved.
819 if group_chunk_size == 0:
825 if group_chunk_size == 0:
820 tested.update(group)
826 tested.update(group)
821 good = yield tuple(group)
827 good = yield tuple(group)
822 else:
828 else:
823 prev_good = good
829 prev_good = good
824 for start in range(0, len(group), group_chunk_size):
830 for start in range(0, len(group), group_chunk_size):
825 sub_group = group[start : start + group_chunk_size]
831 sub_group = group[start : start + group_chunk_size]
826 tested.update(sub_group)
832 tested.update(sub_group)
827 good = yield tuple(sub_group)
833 good = yield tuple(sub_group)
828 if prev_good == good:
834 if prev_good == good:
829 break
835 break
830
836
831 yield None
837 yield None
832
838
833
839
834 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
840 def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
835 good = None
841 good = None
836 # First we try to reuse a the delta contained in the bundle.
842 # First we try to reuse a the delta contained in the bundle.
837 # (or from the source revlog)
843 # (or from the source revlog)
838 #
844 #
839 # This logic only applies to general delta repositories and can be disabled
845 # This logic only applies to general delta repositories and can be disabled
840 # through configuration. Disabling reuse source delta is useful when
846 # through configuration. Disabling reuse source delta is useful when
841 # we want to make sure we recomputed "optimal" deltas.
847 # we want to make sure we recomputed "optimal" deltas.
842 debug_info = None
848 debug_info = None
843 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
849 if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO:
844 # Assume what we received from the server is a good choice
850 # Assume what we received from the server is a good choice
845 # build delta will reuse the cache
851 # build delta will reuse the cache
846 if debug_info is not None:
852 if debug_info is not None:
847 debug_info['cached-delta.tested'] += 1
853 debug_info['cached-delta.tested'] += 1
848 good = yield (cachedelta[0],)
854 good = yield (cachedelta[0],)
849 if good is not None:
855 if good is not None:
850 if debug_info is not None:
856 if debug_info is not None:
851 debug_info['cached-delta.accepted'] += 1
857 debug_info['cached-delta.accepted'] += 1
852 yield None
858 yield None
853 return
859 return
854 if snapshot_cache is None:
860 if snapshot_cache is None:
855 snapshot_cache = SnapshotCache()
861 snapshot_cache = SnapshotCache()
856 groups = _rawgroups(
862 groups = _rawgroups(
857 revlog,
863 revlog,
858 p1,
864 p1,
859 p2,
865 p2,
860 cachedelta,
866 cachedelta,
861 snapshot_cache,
867 snapshot_cache,
862 )
868 )
863 for candidates in groups:
869 for candidates in groups:
864 good = yield candidates
870 good = yield candidates
865 if good is not None:
871 if good is not None:
866 break
872 break
867
873
868 # If sparse revlog is enabled, we can try to refine the available deltas
874 # If sparse revlog is enabled, we can try to refine the available deltas
869 if not revlog._sparserevlog:
875 if not revlog._sparserevlog:
870 yield None
876 yield None
871 return
877 return
872
878
873 # if we have a refinable value, try to refine it
879 # if we have a refinable value, try to refine it
874 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
880 if good is not None and good not in (p1, p2) and revlog.issnapshot(good):
875 # refine snapshot down
881 # refine snapshot down
876 previous = None
882 previous = None
877 while previous != good:
883 while previous != good:
878 previous = good
884 previous = good
879 base = revlog.deltaparent(good)
885 base = revlog.deltaparent(good)
880 if base == nullrev:
886 if base == nullrev:
881 break
887 break
882 good = yield (base,)
888 good = yield (base,)
883 # refine snapshot up
889 # refine snapshot up
884 if not snapshot_cache.snapshots:
890 if not snapshot_cache.snapshots:
885 snapshot_cache.update(revlog, good + 1)
891 snapshot_cache.update(revlog, good + 1)
886 previous = None
892 previous = None
887 while good != previous:
893 while good != previous:
888 previous = good
894 previous = good
889 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
895 children = tuple(sorted(c for c in snapshot_cache.snapshots[good]))
890 good = yield children
896 good = yield children
891
897
892 if debug_info is not None:
898 if debug_info is not None:
893 if good is None:
899 if good is None:
894 debug_info['no-solution'] += 1
900 debug_info['no-solution'] += 1
895
901
896 yield None
902 yield None
897
903
898
904
899 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
905 def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None):
900 """Provides group of revision to be tested as delta base
906 """Provides group of revision to be tested as delta base
901
907
902 This lower level function focus on emitting delta theorically interresting
908 This lower level function focus on emitting delta theorically interresting
903 without looking it any practical details.
909 without looking it any practical details.
904
910
905 The group order aims at providing fast or small candidates first.
911 The group order aims at providing fast or small candidates first.
906 """
912 """
907 gdelta = revlog._generaldelta
913 gdelta = revlog._generaldelta
908 # gate sparse behind general-delta because of issue6056
914 # gate sparse behind general-delta because of issue6056
909 sparse = gdelta and revlog._sparserevlog
915 sparse = gdelta and revlog._sparserevlog
910 curr = len(revlog)
916 curr = len(revlog)
911 prev = curr - 1
917 prev = curr - 1
912 deltachain = lambda rev: revlog._deltachain(rev)[0]
918 deltachain = lambda rev: revlog._deltachain(rev)[0]
913
919
914 if gdelta:
920 if gdelta:
915 # exclude already lazy tested base if any
921 # exclude already lazy tested base if any
916 parents = [p for p in (p1, p2) if p != nullrev]
922 parents = [p for p in (p1, p2) if p != nullrev]
917
923
918 if not revlog._deltabothparents and len(parents) == 2:
924 if not revlog._deltabothparents and len(parents) == 2:
919 parents.sort()
925 parents.sort()
920 # To minimize the chance of having to build a fulltext,
926 # To minimize the chance of having to build a fulltext,
921 # pick first whichever parent is closest to us (max rev)
927 # pick first whichever parent is closest to us (max rev)
922 yield (parents[1],)
928 yield (parents[1],)
923 # then the other one (min rev) if the first did not fit
929 # then the other one (min rev) if the first did not fit
924 yield (parents[0],)
930 yield (parents[0],)
925 elif len(parents) > 0:
931 elif len(parents) > 0:
926 # Test all parents (1 or 2), and keep the best candidate
932 # Test all parents (1 or 2), and keep the best candidate
927 yield parents
933 yield parents
928
934
929 if sparse and parents:
935 if sparse and parents:
930 if snapshot_cache is None:
936 if snapshot_cache is None:
931 # map: base-rev: [snapshot-revs]
937 # map: base-rev: [snapshot-revs]
932 snapshot_cache = SnapshotCache()
938 snapshot_cache = SnapshotCache()
933 # See if we can use an existing snapshot in the parent chains to use as
939 # See if we can use an existing snapshot in the parent chains to use as
934 # a base for a new intermediate-snapshot
940 # a base for a new intermediate-snapshot
935 #
941 #
936 # search for snapshot in parents delta chain
942 # search for snapshot in parents delta chain
937 # map: snapshot-level: snapshot-rev
943 # map: snapshot-level: snapshot-rev
938 parents_snaps = collections.defaultdict(set)
944 parents_snaps = collections.defaultdict(set)
939 candidate_chains = [deltachain(p) for p in parents]
945 candidate_chains = [deltachain(p) for p in parents]
940 for chain in candidate_chains:
946 for chain in candidate_chains:
941 for idx, s in enumerate(chain):
947 for idx, s in enumerate(chain):
942 if not revlog.issnapshot(s):
948 if not revlog.issnapshot(s):
943 break
949 break
944 parents_snaps[idx].add(s)
950 parents_snaps[idx].add(s)
945 snapfloor = min(parents_snaps[0]) + 1
951 snapfloor = min(parents_snaps[0]) + 1
946 snapshot_cache.update(revlog, snapfloor)
952 snapshot_cache.update(revlog, snapfloor)
947 # search for the highest "unrelated" revision
953 # search for the highest "unrelated" revision
948 #
954 #
949 # Adding snapshots used by "unrelated" revision increase the odd we
955 # Adding snapshots used by "unrelated" revision increase the odd we
950 # reuse an independant, yet better snapshot chain.
956 # reuse an independant, yet better snapshot chain.
951 #
957 #
952 # XXX instead of building a set of revisions, we could lazily enumerate
958 # XXX instead of building a set of revisions, we could lazily enumerate
953 # over the chains. That would be more efficient, however we stick to
959 # over the chains. That would be more efficient, however we stick to
954 # simple code for now.
960 # simple code for now.
955 all_revs = set()
961 all_revs = set()
956 for chain in candidate_chains:
962 for chain in candidate_chains:
957 all_revs.update(chain)
963 all_revs.update(chain)
958 other = None
964 other = None
959 for r in revlog.revs(prev, snapfloor):
965 for r in revlog.revs(prev, snapfloor):
960 if r not in all_revs:
966 if r not in all_revs:
961 other = r
967 other = r
962 break
968 break
963 if other is not None:
969 if other is not None:
964 # To avoid unfair competition, we won't use unrelated intermediate
970 # To avoid unfair competition, we won't use unrelated intermediate
965 # snapshot that are deeper than the ones from the parent delta
971 # snapshot that are deeper than the ones from the parent delta
966 # chain.
972 # chain.
967 max_depth = max(parents_snaps.keys())
973 max_depth = max(parents_snaps.keys())
968 chain = deltachain(other)
974 chain = deltachain(other)
969 for depth, s in enumerate(chain):
975 for depth, s in enumerate(chain):
970 if s < snapfloor:
976 if s < snapfloor:
971 continue
977 continue
972 if max_depth < depth:
978 if max_depth < depth:
973 break
979 break
974 if not revlog.issnapshot(s):
980 if not revlog.issnapshot(s):
975 break
981 break
976 parents_snaps[depth].add(s)
982 parents_snaps[depth].add(s)
977 # Test them as possible intermediate snapshot base
983 # Test them as possible intermediate snapshot base
978 # We test them from highest to lowest level. High level one are more
984 # We test them from highest to lowest level. High level one are more
979 # likely to result in small delta
985 # likely to result in small delta
980 floor = None
986 floor = None
981 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
987 for idx, snaps in sorted(parents_snaps.items(), reverse=True):
982 siblings = set()
988 siblings = set()
983 for s in snaps:
989 for s in snaps:
984 siblings.update(snapshot_cache.snapshots[s])
990 siblings.update(snapshot_cache.snapshots[s])
985 # Before considering making a new intermediate snapshot, we check
991 # Before considering making a new intermediate snapshot, we check
986 # if an existing snapshot, children of base we consider, would be
992 # if an existing snapshot, children of base we consider, would be
987 # suitable.
993 # suitable.
988 #
994 #
989 # It give a change to reuse a delta chain "unrelated" to the
995 # It give a change to reuse a delta chain "unrelated" to the
990 # current revision instead of starting our own. Without such
996 # current revision instead of starting our own. Without such
991 # re-use, topological branches would keep reopening new chains.
997 # re-use, topological branches would keep reopening new chains.
992 # Creating more and more snapshot as the repository grow.
998 # Creating more and more snapshot as the repository grow.
993
999
994 if floor is not None:
1000 if floor is not None:
995 # We only do this for siblings created after the one in our
1001 # We only do this for siblings created after the one in our
996 # parent's delta chain. Those created before has less chances
1002 # parent's delta chain. Those created before has less chances
997 # to be valid base since our ancestors had to create a new
1003 # to be valid base since our ancestors had to create a new
998 # snapshot.
1004 # snapshot.
999 siblings = [r for r in siblings if floor < r]
1005 siblings = [r for r in siblings if floor < r]
1000 yield tuple(sorted(siblings))
1006 yield tuple(sorted(siblings))
1001 # then test the base from our parent's delta chain.
1007 # then test the base from our parent's delta chain.
1002 yield tuple(sorted(snaps))
1008 yield tuple(sorted(snaps))
1003 floor = min(snaps)
1009 floor = min(snaps)
1004 # No suitable base found in the parent chain, search if any full
1010 # No suitable base found in the parent chain, search if any full
1005 # snapshots emitted since parent's base would be a suitable base for an
1011 # snapshots emitted since parent's base would be a suitable base for an
1006 # intermediate snapshot.
1012 # intermediate snapshot.
1007 #
1013 #
1008 # It give a chance to reuse a delta chain unrelated to the current
1014 # It give a chance to reuse a delta chain unrelated to the current
1009 # revisions instead of starting our own. Without such re-use,
1015 # revisions instead of starting our own. Without such re-use,
1010 # topological branches would keep reopening new full chains. Creating
1016 # topological branches would keep reopening new full chains. Creating
1011 # more and more snapshot as the repository grow.
1017 # more and more snapshot as the repository grow.
1012 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1018 full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r]
1013 yield tuple(sorted(full))
1019 yield tuple(sorted(full))
1014
1020
1015 if not sparse:
1021 if not sparse:
1016 # other approach failed try against prev to hopefully save us a
1022 # other approach failed try against prev to hopefully save us a
1017 # fulltext.
1023 # fulltext.
1018 yield (prev,)
1024 yield (prev,)
1019
1025
1020
1026
1021 class SnapshotCache:
1027 class SnapshotCache:
1022 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1028 __slots__ = ('snapshots', '_start_rev', '_end_rev')
1023
1029
1024 def __init__(self):
1030 def __init__(self):
1025 self.snapshots = collections.defaultdict(set)
1031 self.snapshots = collections.defaultdict(set)
1026 self._start_rev = None
1032 self._start_rev = None
1027 self._end_rev = None
1033 self._end_rev = None
1028
1034
1029 def update(self, revlog, start_rev=0):
1035 def update(self, revlog, start_rev=0):
1030 """find snapshots from start_rev to tip"""
1036 """find snapshots from start_rev to tip"""
1031 nb_revs = len(revlog)
1037 nb_revs = len(revlog)
1032 end_rev = nb_revs - 1
1038 end_rev = nb_revs - 1
1033 if start_rev > end_rev:
1039 if start_rev > end_rev:
1034 return # range is empty
1040 return # range is empty
1035
1041
1036 if self._start_rev is None:
1042 if self._start_rev is None:
1037 assert self._end_rev is None
1043 assert self._end_rev is None
1038 self._update(revlog, start_rev, end_rev)
1044 self._update(revlog, start_rev, end_rev)
1039 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1045 elif not (self._start_rev <= start_rev and end_rev <= self._end_rev):
1040 if start_rev < self._start_rev:
1046 if start_rev < self._start_rev:
1041 self._update(revlog, start_rev, self._start_rev - 1)
1047 self._update(revlog, start_rev, self._start_rev - 1)
1042 if self._end_rev < end_rev:
1048 if self._end_rev < end_rev:
1043 self._update(revlog, self._end_rev + 1, end_rev)
1049 self._update(revlog, self._end_rev + 1, end_rev)
1044
1050
1045 if self._start_rev is None:
1051 if self._start_rev is None:
1046 assert self._end_rev is None
1052 assert self._end_rev is None
1047 self._end_rev = end_rev
1053 self._end_rev = end_rev
1048 self._start_rev = start_rev
1054 self._start_rev = start_rev
1049 else:
1055 else:
1050 self._start_rev = min(self._start_rev, start_rev)
1056 self._start_rev = min(self._start_rev, start_rev)
1051 self._end_rev = max(self._end_rev, end_rev)
1057 self._end_rev = max(self._end_rev, end_rev)
1052 assert self._start_rev <= self._end_rev, (
1058 assert self._start_rev <= self._end_rev, (
1053 self._start_rev,
1059 self._start_rev,
1054 self._end_rev,
1060 self._end_rev,
1055 )
1061 )
1056
1062
1057 def _update(self, revlog, start_rev, end_rev):
1063 def _update(self, revlog, start_rev, end_rev):
1058 """internal method that actually do update content"""
1064 """internal method that actually do update content"""
1059 assert self._start_rev is None or (
1065 assert self._start_rev is None or (
1060 start_rev < self._start_rev or start_rev > self._end_rev
1066 start_rev < self._start_rev or start_rev > self._end_rev
1061 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1067 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1062 assert self._start_rev is None or (
1068 assert self._start_rev is None or (
1063 end_rev < self._start_rev or end_rev > self._end_rev
1069 end_rev < self._start_rev or end_rev > self._end_rev
1064 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1070 ), (self._start_rev, self._end_rev, start_rev, end_rev)
1065 cache = self.snapshots
1071 cache = self.snapshots
1066 if util.safehasattr(revlog.index, b'findsnapshots'):
1072 if util.safehasattr(revlog.index, b'findsnapshots'):
1067 revlog.index.findsnapshots(cache, start_rev, end_rev)
1073 revlog.index.findsnapshots(cache, start_rev, end_rev)
1068 else:
1074 else:
1069 deltaparent = revlog.deltaparent
1075 deltaparent = revlog.deltaparent
1070 issnapshot = revlog.issnapshot
1076 issnapshot = revlog.issnapshot
1071 for rev in revlog.revs(start_rev, end_rev):
1077 for rev in revlog.revs(start_rev, end_rev):
1072 if issnapshot(rev):
1078 if issnapshot(rev):
1073 cache[deltaparent(rev)].add(rev)
1079 cache[deltaparent(rev)].add(rev)
1074
1080
1075
1081
1076 class deltacomputer:
1082 class deltacomputer:
1077 def __init__(
1083 def __init__(
1078 self,
1084 self,
1079 revlog,
1085 revlog,
1080 write_debug=None,
1086 write_debug=None,
1081 debug_search=False,
1087 debug_search=False,
1082 debug_info=None,
1088 debug_info=None,
1083 ):
1089 ):
1084 self.revlog = revlog
1090 self.revlog = revlog
1085 self._write_debug = write_debug
1091 self._write_debug = write_debug
1086 self._debug_search = debug_search
1092 self._debug_search = debug_search
1087 self._debug_info = debug_info
1093 self._debug_info = debug_info
1088 self._snapshot_cache = SnapshotCache()
1094 self._snapshot_cache = SnapshotCache()
1089
1095
1090 def buildtext(self, revinfo, fh):
1096 def buildtext(self, revinfo, fh):
1091 """Builds a fulltext version of a revision
1097 """Builds a fulltext version of a revision
1092
1098
1093 revinfo: revisioninfo instance that contains all needed info
1099 revinfo: revisioninfo instance that contains all needed info
1094 fh: file handle to either the .i or the .d revlog file,
1100 fh: file handle to either the .i or the .d revlog file,
1095 depending on whether it is inlined or not
1101 depending on whether it is inlined or not
1096 """
1102 """
1097 btext = revinfo.btext
1103 btext = revinfo.btext
1098 if btext[0] is not None:
1104 if btext[0] is not None:
1099 return btext[0]
1105 return btext[0]
1100
1106
1101 revlog = self.revlog
1107 revlog = self.revlog
1102 cachedelta = revinfo.cachedelta
1108 cachedelta = revinfo.cachedelta
1103 baserev = cachedelta[0]
1109 baserev = cachedelta[0]
1104 delta = cachedelta[1]
1110 delta = cachedelta[1]
1105
1111
1106 fulltext = btext[0] = _textfromdelta(
1112 fulltext = btext[0] = _textfromdelta(
1107 fh,
1113 fh,
1108 revlog,
1114 revlog,
1109 baserev,
1115 baserev,
1110 delta,
1116 delta,
1111 revinfo.p1,
1117 revinfo.p1,
1112 revinfo.p2,
1118 revinfo.p2,
1113 revinfo.flags,
1119 revinfo.flags,
1114 revinfo.node,
1120 revinfo.node,
1115 )
1121 )
1116 return fulltext
1122 return fulltext
1117
1123
1118 def _builddeltadiff(self, base, revinfo, fh):
1124 def _builddeltadiff(self, base, revinfo, fh):
1119 revlog = self.revlog
1125 revlog = self.revlog
1120 t = self.buildtext(revinfo, fh)
1126 t = self.buildtext(revinfo, fh)
1121 if revlog.iscensored(base):
1127 if revlog.iscensored(base):
1122 # deltas based on a censored revision must replace the
1128 # deltas based on a censored revision must replace the
1123 # full content in one patch, so delta works everywhere
1129 # full content in one patch, so delta works everywhere
1124 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1130 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
1125 delta = header + t
1131 delta = header + t
1126 else:
1132 else:
1127 ptext = revlog.rawdata(base, _df=fh)
1133 ptext = revlog.rawdata(base, _df=fh)
1128 delta = mdiff.textdiff(ptext, t)
1134 delta = mdiff.textdiff(ptext, t)
1129
1135
1130 return delta
1136 return delta
1131
1137
1132 def _builddeltainfo(self, revinfo, base, fh):
1138 def _builddeltainfo(self, revinfo, base, fh):
1133 # can we use the cached delta?
1139 # can we use the cached delta?
1134 revlog = self.revlog
1140 revlog = self.revlog
1135 debug_search = self._write_debug is not None and self._debug_search
1141 debug_search = self._write_debug is not None and self._debug_search
1136 chainbase = revlog.chainbase(base)
1142 chainbase = revlog.chainbase(base)
1137 if revlog._generaldelta:
1143 if revlog._generaldelta:
1138 deltabase = base
1144 deltabase = base
1139 else:
1145 else:
1140 deltabase = chainbase
1146 deltabase = chainbase
1141 snapshotdepth = None
1147 snapshotdepth = None
1142 if revlog._sparserevlog and deltabase == nullrev:
1148 if revlog._sparserevlog and deltabase == nullrev:
1143 snapshotdepth = 0
1149 snapshotdepth = 0
1144 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1150 elif revlog._sparserevlog and revlog.issnapshot(deltabase):
1145 # A delta chain should always be one full snapshot,
1151 # A delta chain should always be one full snapshot,
1146 # zero or more semi-snapshots, and zero or more deltas
1152 # zero or more semi-snapshots, and zero or more deltas
1147 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1153 p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2)
1148 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1154 if deltabase not in (p1, p2) and revlog.issnapshot(deltabase):
1149 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1155 snapshotdepth = len(revlog._deltachain(deltabase)[0])
1150 delta = None
1156 delta = None
1151 if revinfo.cachedelta:
1157 if revinfo.cachedelta:
1152 cachebase = revinfo.cachedelta[0]
1158 cachebase = revinfo.cachedelta[0]
1153 # check if the diff still apply
1159 # check if the diff still apply
1154 currentbase = cachebase
1160 currentbase = cachebase
1155 while (
1161 while (
1156 currentbase != nullrev
1162 currentbase != nullrev
1157 and currentbase != base
1163 and currentbase != base
1158 and self.revlog.length(currentbase) == 0
1164 and self.revlog.length(currentbase) == 0
1159 ):
1165 ):
1160 currentbase = self.revlog.deltaparent(currentbase)
1166 currentbase = self.revlog.deltaparent(currentbase)
1161 if self.revlog._lazydelta and currentbase == base:
1167 if self.revlog._lazydelta and currentbase == base:
1162 delta = revinfo.cachedelta[1]
1168 delta = revinfo.cachedelta[1]
1163 if delta is None:
1169 if delta is None:
1164 delta = self._builddeltadiff(base, revinfo, fh)
1170 delta = self._builddeltadiff(base, revinfo, fh)
1165 if debug_search:
1171 if debug_search:
1166 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1172 msg = b"DBG-DELTAS-SEARCH: uncompressed-delta-size=%d\n"
1167 msg %= len(delta)
1173 msg %= len(delta)
1168 self._write_debug(msg)
1174 self._write_debug(msg)
1169 # snapshotdept need to be neither None nor 0 level snapshot
1175 # snapshotdept need to be neither None nor 0 level snapshot
1170 if revlog.upperboundcomp is not None and snapshotdepth:
1176 if revlog.upperboundcomp is not None and snapshotdepth:
1171 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1177 lowestrealisticdeltalen = len(delta) // revlog.upperboundcomp
1172 snapshotlimit = revinfo.textlen >> snapshotdepth
1178 snapshotlimit = revinfo.textlen >> snapshotdepth
1173 if debug_search:
1179 if debug_search:
1174 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1180 msg = b"DBG-DELTAS-SEARCH: projected-lower-size=%d\n"
1175 msg %= lowestrealisticdeltalen
1181 msg %= lowestrealisticdeltalen
1176 self._write_debug(msg)
1182 self._write_debug(msg)
1177 if snapshotlimit < lowestrealisticdeltalen:
1183 if snapshotlimit < lowestrealisticdeltalen:
1178 if debug_search:
1184 if debug_search:
1179 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1185 msg = b"DBG-DELTAS-SEARCH: DISCARDED (snapshot limit)\n"
1180 self._write_debug(msg)
1186 self._write_debug(msg)
1181 return None
1187 return None
1182 if revlog.length(base) < lowestrealisticdeltalen:
1188 if revlog.length(base) < lowestrealisticdeltalen:
1183 if debug_search:
1189 if debug_search:
1184 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1190 msg = b"DBG-DELTAS-SEARCH: DISCARDED (prev size)\n"
1185 self._write_debug(msg)
1191 self._write_debug(msg)
1186 return None
1192 return None
1187 header, data = revlog.compress(delta)
1193 header, data = revlog.compress(delta)
1188 deltalen = len(header) + len(data)
1194 deltalen = len(header) + len(data)
1189 offset = revlog.end(len(revlog) - 1)
1195 offset = revlog.end(len(revlog) - 1)
1190 dist = deltalen + offset - revlog.start(chainbase)
1196 dist = deltalen + offset - revlog.start(chainbase)
1191 chainlen, compresseddeltalen = revlog._chaininfo(base)
1197 chainlen, compresseddeltalen = revlog._chaininfo(base)
1192 chainlen += 1
1198 chainlen += 1
1193 compresseddeltalen += deltalen
1199 compresseddeltalen += deltalen
1194
1200
1195 return _deltainfo(
1201 return _deltainfo(
1196 dist,
1202 dist,
1197 deltalen,
1203 deltalen,
1198 (header, data),
1204 (header, data),
1199 deltabase,
1205 deltabase,
1200 chainbase,
1206 chainbase,
1201 chainlen,
1207 chainlen,
1202 compresseddeltalen,
1208 compresseddeltalen,
1203 snapshotdepth,
1209 snapshotdepth,
1204 )
1210 )
1205
1211
1206 def _fullsnapshotinfo(self, fh, revinfo, curr):
1212 def _fullsnapshotinfo(self, fh, revinfo, curr):
1207 rawtext = self.buildtext(revinfo, fh)
1213 rawtext = self.buildtext(revinfo, fh)
1208 data = self.revlog.compress(rawtext)
1214 data = self.revlog.compress(rawtext)
1209 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1215 compresseddeltalen = deltalen = dist = len(data[1]) + len(data[0])
1210 deltabase = chainbase = curr
1216 deltabase = chainbase = curr
1211 snapshotdepth = 0
1217 snapshotdepth = 0
1212 chainlen = 1
1218 chainlen = 1
1213
1219
1214 return _deltainfo(
1220 return _deltainfo(
1215 dist,
1221 dist,
1216 deltalen,
1222 deltalen,
1217 data,
1223 data,
1218 deltabase,
1224 deltabase,
1219 chainbase,
1225 chainbase,
1220 chainlen,
1226 chainlen,
1221 compresseddeltalen,
1227 compresseddeltalen,
1222 snapshotdepth,
1228 snapshotdepth,
1223 )
1229 )
1224
1230
1225 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1231 def finddeltainfo(self, revinfo, fh, excluded_bases=None, target_rev=None):
1226 """Find an acceptable delta against a candidate revision
1232 """Find an acceptable delta against a candidate revision
1227
1233
1228 revinfo: information about the revision (instance of _revisioninfo)
1234 revinfo: information about the revision (instance of _revisioninfo)
1229 fh: file handle to either the .i or the .d revlog file,
1235 fh: file handle to either the .i or the .d revlog file,
1230 depending on whether it is inlined or not
1236 depending on whether it is inlined or not
1231
1237
1232 Returns the first acceptable candidate revision, as ordered by
1238 Returns the first acceptable candidate revision, as ordered by
1233 _candidategroups
1239 _candidategroups
1234
1240
1235 If no suitable deltabase is found, we return delta info for a full
1241 If no suitable deltabase is found, we return delta info for a full
1236 snapshot.
1242 snapshot.
1237
1243
1238 `excluded_bases` is an optional set of revision that cannot be used as
1244 `excluded_bases` is an optional set of revision that cannot be used as
1239 a delta base. Use this to recompute delta suitable in censor or strip
1245 a delta base. Use this to recompute delta suitable in censor or strip
1240 context.
1246 context.
1241 """
1247 """
1242 if target_rev is None:
1248 if target_rev is None:
1243 target_rev = len(self.revlog)
1249 target_rev = len(self.revlog)
1244
1250
1245 if not revinfo.textlen:
1251 if not revinfo.textlen:
1246 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1252 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1247
1253
1248 if excluded_bases is None:
1254 if excluded_bases is None:
1249 excluded_bases = set()
1255 excluded_bases = set()
1250
1256
1251 # no delta for flag processor revision (see "candelta" for why)
1257 # no delta for flag processor revision (see "candelta" for why)
1252 # not calling candelta since only one revision needs test, also to
1258 # not calling candelta since only one revision needs test, also to
1253 # avoid overhead fetching flags again.
1259 # avoid overhead fetching flags again.
1254 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1260 if revinfo.flags & REVIDX_RAWTEXT_CHANGING_FLAGS:
1255 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1261 return self._fullsnapshotinfo(fh, revinfo, target_rev)
1256
1262
1257 gather_debug = (
1263 gather_debug = (
1258 self._write_debug is not None or self._debug_info is not None
1264 self._write_debug is not None or self._debug_info is not None
1259 )
1265 )
1260 debug_search = self._write_debug is not None and self._debug_search
1266 debug_search = self._write_debug is not None and self._debug_search
1261
1267
1262 if gather_debug:
1268 if gather_debug:
1263 start = util.timer()
1269 start = util.timer()
1264
1270
1265 # count the number of different delta we tried (for debug purpose)
1271 # count the number of different delta we tried (for debug purpose)
1266 dbg_try_count = 0
1272 dbg_try_count = 0
1267 # count the number of "search round" we did. (for debug purpose)
1273 # count the number of "search round" we did. (for debug purpose)
1268 dbg_try_rounds = 0
1274 dbg_try_rounds = 0
1269 dbg_type = b'unknown'
1275 dbg_type = b'unknown'
1270
1276
1271 cachedelta = revinfo.cachedelta
1277 cachedelta = revinfo.cachedelta
1272 p1 = revinfo.p1
1278 p1 = revinfo.p1
1273 p2 = revinfo.p2
1279 p2 = revinfo.p2
1274 revlog = self.revlog
1280 revlog = self.revlog
1275
1281
1276 deltainfo = None
1282 deltainfo = None
1277 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1283 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
1278
1284
1279 if gather_debug:
1285 if gather_debug:
1280 if p1r != nullrev:
1286 if p1r != nullrev:
1281 p1_chain_len = revlog._chaininfo(p1r)[0]
1287 p1_chain_len = revlog._chaininfo(p1r)[0]
1282 else:
1288 else:
1283 p1_chain_len = -1
1289 p1_chain_len = -1
1284 if p2r != nullrev:
1290 if p2r != nullrev:
1285 p2_chain_len = revlog._chaininfo(p2r)[0]
1291 p2_chain_len = revlog._chaininfo(p2r)[0]
1286 else:
1292 else:
1287 p2_chain_len = -1
1293 p2_chain_len = -1
1288 if debug_search:
1294 if debug_search:
1289 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1295 msg = b"DBG-DELTAS-SEARCH: SEARCH rev=%d\n"
1290 msg %= target_rev
1296 msg %= target_rev
1291 self._write_debug(msg)
1297 self._write_debug(msg)
1292
1298
1293 groups = _candidategroups(
1299 groups = _candidategroups(
1294 self.revlog,
1300 self.revlog,
1295 revinfo.textlen,
1301 revinfo.textlen,
1296 p1r,
1302 p1r,
1297 p2r,
1303 p2r,
1298 cachedelta,
1304 cachedelta,
1299 excluded_bases,
1305 excluded_bases,
1300 target_rev,
1306 target_rev,
1301 snapshot_cache=self._snapshot_cache,
1307 snapshot_cache=self._snapshot_cache,
1302 )
1308 )
1303 candidaterevs = next(groups)
1309 candidaterevs = next(groups)
1304 while candidaterevs is not None:
1310 while candidaterevs is not None:
1305 dbg_try_rounds += 1
1311 dbg_try_rounds += 1
1306 if debug_search:
1312 if debug_search:
1307 prev = None
1313 prev = None
1308 if deltainfo is not None:
1314 if deltainfo is not None:
1309 prev = deltainfo.base
1315 prev = deltainfo.base
1310
1316
1311 if (
1317 if (
1312 cachedelta is not None
1318 cachedelta is not None
1313 and len(candidaterevs) == 1
1319 and len(candidaterevs) == 1
1314 and cachedelta[0] in candidaterevs
1320 and cachedelta[0] in candidaterevs
1315 ):
1321 ):
1316 round_type = b"cached-delta"
1322 round_type = b"cached-delta"
1317 elif p1 in candidaterevs or p2 in candidaterevs:
1323 elif p1 in candidaterevs or p2 in candidaterevs:
1318 round_type = b"parents"
1324 round_type = b"parents"
1319 elif prev is not None and all(c < prev for c in candidaterevs):
1325 elif prev is not None and all(c < prev for c in candidaterevs):
1320 round_type = b"refine-down"
1326 round_type = b"refine-down"
1321 elif prev is not None and all(c > prev for c in candidaterevs):
1327 elif prev is not None and all(c > prev for c in candidaterevs):
1322 round_type = b"refine-up"
1328 round_type = b"refine-up"
1323 else:
1329 else:
1324 round_type = b"search-down"
1330 round_type = b"search-down"
1325 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1331 msg = b"DBG-DELTAS-SEARCH: ROUND #%d - %d candidates - %s\n"
1326 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1332 msg %= (dbg_try_rounds, len(candidaterevs), round_type)
1327 self._write_debug(msg)
1333 self._write_debug(msg)
1328 nominateddeltas = []
1334 nominateddeltas = []
1329 if deltainfo is not None:
1335 if deltainfo is not None:
1330 if debug_search:
1336 if debug_search:
1331 msg = (
1337 msg = (
1332 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1338 b"DBG-DELTAS-SEARCH: CONTENDER: rev=%d - length=%d\n"
1333 )
1339 )
1334 msg %= (deltainfo.base, deltainfo.deltalen)
1340 msg %= (deltainfo.base, deltainfo.deltalen)
1335 self._write_debug(msg)
1341 self._write_debug(msg)
1336 # if we already found a good delta,
1342 # if we already found a good delta,
1337 # challenge it against refined candidates
1343 # challenge it against refined candidates
1338 nominateddeltas.append(deltainfo)
1344 nominateddeltas.append(deltainfo)
1339 for candidaterev in candidaterevs:
1345 for candidaterev in candidaterevs:
1340 if debug_search:
1346 if debug_search:
1341 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1347 msg = b"DBG-DELTAS-SEARCH: CANDIDATE: rev=%d\n"
1342 msg %= candidaterev
1348 msg %= candidaterev
1343 self._write_debug(msg)
1349 self._write_debug(msg)
1344 candidate_type = None
1350 candidate_type = None
1345 if candidaterev == p1:
1351 if candidaterev == p1:
1346 candidate_type = b"p1"
1352 candidate_type = b"p1"
1347 elif candidaterev == p2:
1353 elif candidaterev == p2:
1348 candidate_type = b"p2"
1354 candidate_type = b"p2"
1349 elif self.revlog.issnapshot(candidaterev):
1355 elif self.revlog.issnapshot(candidaterev):
1350 candidate_type = b"snapshot-%d"
1356 candidate_type = b"snapshot-%d"
1351 candidate_type %= self.revlog.snapshotdepth(
1357 candidate_type %= self.revlog.snapshotdepth(
1352 candidaterev
1358 candidaterev
1353 )
1359 )
1354
1360
1355 if candidate_type is not None:
1361 if candidate_type is not None:
1356 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1362 msg = b"DBG-DELTAS-SEARCH: type=%s\n"
1357 msg %= candidate_type
1363 msg %= candidate_type
1358 self._write_debug(msg)
1364 self._write_debug(msg)
1359 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1365 msg = b"DBG-DELTAS-SEARCH: size=%d\n"
1360 msg %= self.revlog.length(candidaterev)
1366 msg %= self.revlog.length(candidaterev)
1361 self._write_debug(msg)
1367 self._write_debug(msg)
1362 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1368 msg = b"DBG-DELTAS-SEARCH: base=%d\n"
1363 msg %= self.revlog.deltaparent(candidaterev)
1369 msg %= self.revlog.deltaparent(candidaterev)
1364 self._write_debug(msg)
1370 self._write_debug(msg)
1365
1371
1366 dbg_try_count += 1
1372 dbg_try_count += 1
1367
1373
1368 if debug_search:
1374 if debug_search:
1369 delta_start = util.timer()
1375 delta_start = util.timer()
1370 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1376 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
1371 if debug_search:
1377 if debug_search:
1372 delta_end = util.timer()
1378 delta_end = util.timer()
1373 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1379 msg = b"DBG-DELTAS-SEARCH: delta-search-time=%f\n"
1374 msg %= delta_end - delta_start
1380 msg %= delta_end - delta_start
1375 self._write_debug(msg)
1381 self._write_debug(msg)
1376 if candidatedelta is not None:
1382 if candidatedelta is not None:
1377 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1383 if is_good_delta_info(self.revlog, candidatedelta, revinfo):
1378 if debug_search:
1384 if debug_search:
1379 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1385 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n"
1380 msg %= candidatedelta.deltalen
1386 msg %= candidatedelta.deltalen
1381 self._write_debug(msg)
1387 self._write_debug(msg)
1382 nominateddeltas.append(candidatedelta)
1388 nominateddeltas.append(candidatedelta)
1383 elif debug_search:
1389 elif debug_search:
1384 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1390 msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (BAD)\n"
1385 msg %= candidatedelta.deltalen
1391 msg %= candidatedelta.deltalen
1386 self._write_debug(msg)
1392 self._write_debug(msg)
1387 elif debug_search:
1393 elif debug_search:
1388 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1394 msg = b"DBG-DELTAS-SEARCH: NO-DELTA\n"
1389 self._write_debug(msg)
1395 self._write_debug(msg)
1390 if nominateddeltas:
1396 if nominateddeltas:
1391 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1397 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
1392 if deltainfo is not None:
1398 if deltainfo is not None:
1393 candidaterevs = groups.send(deltainfo.base)
1399 candidaterevs = groups.send(deltainfo.base)
1394 else:
1400 else:
1395 candidaterevs = next(groups)
1401 candidaterevs = next(groups)
1396
1402
1397 if deltainfo is None:
1403 if deltainfo is None:
1398 dbg_type = b"full"
1404 dbg_type = b"full"
1399 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1405 deltainfo = self._fullsnapshotinfo(fh, revinfo, target_rev)
1400 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1406 elif deltainfo.snapshotdepth: # pytype: disable=attribute-error
1401 dbg_type = b"snapshot"
1407 dbg_type = b"snapshot"
1402 else:
1408 else:
1403 dbg_type = b"delta"
1409 dbg_type = b"delta"
1404
1410
1405 if gather_debug:
1411 if gather_debug:
1406 end = util.timer()
1412 end = util.timer()
1407 if dbg_type == b'full':
1413 if dbg_type == b'full':
1408 used_cached = (
1414 used_cached = (
1409 cachedelta is not None
1415 cachedelta is not None
1410 and dbg_try_rounds == 0
1416 and dbg_try_rounds == 0
1411 and dbg_try_count == 0
1417 and dbg_try_count == 0
1412 and cachedelta[0] == nullrev
1418 and cachedelta[0] == nullrev
1413 )
1419 )
1414 else:
1420 else:
1415 used_cached = (
1421 used_cached = (
1416 cachedelta is not None
1422 cachedelta is not None
1417 and dbg_try_rounds == 1
1423 and dbg_try_rounds == 1
1418 and dbg_try_count == 1
1424 and dbg_try_count == 1
1419 and deltainfo.base == cachedelta[0]
1425 and deltainfo.base == cachedelta[0]
1420 )
1426 )
1421 dbg = {
1427 dbg = {
1422 'duration': end - start,
1428 'duration': end - start,
1423 'revision': target_rev,
1429 'revision': target_rev,
1424 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1430 'delta-base': deltainfo.base, # pytype: disable=attribute-error
1425 'search_round_count': dbg_try_rounds,
1431 'search_round_count': dbg_try_rounds,
1426 'using-cached-base': used_cached,
1432 'using-cached-base': used_cached,
1427 'delta_try_count': dbg_try_count,
1433 'delta_try_count': dbg_try_count,
1428 'type': dbg_type,
1434 'type': dbg_type,
1429 'p1-chain-len': p1_chain_len,
1435 'p1-chain-len': p1_chain_len,
1430 'p2-chain-len': p2_chain_len,
1436 'p2-chain-len': p2_chain_len,
1431 }
1437 }
1432 if (
1438 if (
1433 deltainfo.snapshotdepth # pytype: disable=attribute-error
1439 deltainfo.snapshotdepth # pytype: disable=attribute-error
1434 is not None
1440 is not None
1435 ):
1441 ):
1436 dbg[
1442 dbg[
1437 'snapshot-depth'
1443 'snapshot-depth'
1438 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1444 ] = deltainfo.snapshotdepth # pytype: disable=attribute-error
1439 else:
1445 else:
1440 dbg['snapshot-depth'] = 0
1446 dbg['snapshot-depth'] = 0
1441 target_revlog = b"UNKNOWN"
1447 target_revlog = b"UNKNOWN"
1442 target_type = self.revlog.target[0]
1448 target_type = self.revlog.target[0]
1443 target_key = self.revlog.target[1]
1449 target_key = self.revlog.target[1]
1444 if target_type == KIND_CHANGELOG:
1450 if target_type == KIND_CHANGELOG:
1445 target_revlog = b'CHANGELOG:'
1451 target_revlog = b'CHANGELOG:'
1446 elif target_type == KIND_MANIFESTLOG:
1452 elif target_type == KIND_MANIFESTLOG:
1447 target_revlog = b'MANIFESTLOG:'
1453 target_revlog = b'MANIFESTLOG:'
1448 if target_key:
1454 if target_key:
1449 target_revlog += b'%s:' % target_key
1455 target_revlog += b'%s:' % target_key
1450 elif target_type == KIND_FILELOG:
1456 elif target_type == KIND_FILELOG:
1451 target_revlog = b'FILELOG:'
1457 target_revlog = b'FILELOG:'
1452 if target_key:
1458 if target_key:
1453 target_revlog += b'%s:' % target_key
1459 target_revlog += b'%s:' % target_key
1454 dbg['target-revlog'] = target_revlog
1460 dbg['target-revlog'] = target_revlog
1455
1461
1456 if self._debug_info is not None:
1462 if self._debug_info is not None:
1457 self._debug_info.append(dbg)
1463 self._debug_info.append(dbg)
1458
1464
1459 if self._write_debug is not None:
1465 if self._write_debug is not None:
1460 msg = (
1466 msg = (
1461 b"DBG-DELTAS:"
1467 b"DBG-DELTAS:"
1462 b" %-12s"
1468 b" %-12s"
1463 b" rev=%d:"
1469 b" rev=%d:"
1464 b" delta-base=%d"
1470 b" delta-base=%d"
1465 b" is-cached=%d"
1471 b" is-cached=%d"
1466 b" - search-rounds=%d"
1472 b" - search-rounds=%d"
1467 b" try-count=%d"
1473 b" try-count=%d"
1468 b" - delta-type=%-6s"
1474 b" - delta-type=%-6s"
1469 b" snap-depth=%d"
1475 b" snap-depth=%d"
1470 b" - p1-chain-length=%d"
1476 b" - p1-chain-length=%d"
1471 b" p2-chain-length=%d"
1477 b" p2-chain-length=%d"
1472 b" - duration=%f"
1478 b" - duration=%f"
1473 b"\n"
1479 b"\n"
1474 )
1480 )
1475 msg %= (
1481 msg %= (
1476 dbg["target-revlog"],
1482 dbg["target-revlog"],
1477 dbg["revision"],
1483 dbg["revision"],
1478 dbg["delta-base"],
1484 dbg["delta-base"],
1479 dbg["using-cached-base"],
1485 dbg["using-cached-base"],
1480 dbg["search_round_count"],
1486 dbg["search_round_count"],
1481 dbg["delta_try_count"],
1487 dbg["delta_try_count"],
1482 dbg["type"],
1488 dbg["type"],
1483 dbg["snapshot-depth"],
1489 dbg["snapshot-depth"],
1484 dbg["p1-chain-len"],
1490 dbg["p1-chain-len"],
1485 dbg["p2-chain-len"],
1491 dbg["p2-chain-len"],
1486 dbg["duration"],
1492 dbg["duration"],
1487 )
1493 )
1488 self._write_debug(msg)
1494 self._write_debug(msg)
1489 return deltainfo
1495 return deltainfo
1490
1496
1491
1497
1492 def delta_compression(default_compression_header, deltainfo):
1498 def delta_compression(default_compression_header, deltainfo):
1493 """return (COMPRESSION_MODE, deltainfo)
1499 """return (COMPRESSION_MODE, deltainfo)
1494
1500
1495 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1501 used by revlog v2+ format to dispatch between PLAIN and DEFAULT
1496 compression.
1502 compression.
1497 """
1503 """
1498 h, d = deltainfo.data
1504 h, d = deltainfo.data
1499 compression_mode = COMP_MODE_INLINE
1505 compression_mode = COMP_MODE_INLINE
1500 if not h and not d:
1506 if not h and not d:
1501 # not data to store at all... declare them uncompressed
1507 # not data to store at all... declare them uncompressed
1502 compression_mode = COMP_MODE_PLAIN
1508 compression_mode = COMP_MODE_PLAIN
1503 elif not h:
1509 elif not h:
1504 t = d[0:1]
1510 t = d[0:1]
1505 if t == b'\0':
1511 if t == b'\0':
1506 compression_mode = COMP_MODE_PLAIN
1512 compression_mode = COMP_MODE_PLAIN
1507 elif t == default_compression_header:
1513 elif t == default_compression_header:
1508 compression_mode = COMP_MODE_DEFAULT
1514 compression_mode = COMP_MODE_DEFAULT
1509 elif h == b'u':
1515 elif h == b'u':
1510 # we have a more efficient way to declare uncompressed
1516 # we have a more efficient way to declare uncompressed
1511 h = b''
1517 h = b''
1512 compression_mode = COMP_MODE_PLAIN
1518 compression_mode = COMP_MODE_PLAIN
1513 deltainfo = drop_u_compression(deltainfo)
1519 deltainfo = drop_u_compression(deltainfo)
1514 return compression_mode, deltainfo
1520 return compression_mode, deltainfo
General Comments 0
You need to be logged in to leave comments. Login now