Show More
@@ -1,16 +1,58 | |||
|
1 | 1 | # mercurial.revlogutils -- basic utilities for revlog |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net> |
|
4 | 4 | # |
|
5 | 5 | # This software may be used and distributed according to the terms of the |
|
6 | 6 | # GNU General Public License version 2 or any later version. |
|
7 | 7 | |
|
8 | 8 | from __future__ import absolute_import |
|
9 | 9 | |
|
10 | 10 | from ..interfaces import repository |
|
11 | 11 | |
|
12 | # See mercurial.revlogutils.constants for doc | |
|
13 | COMP_MODE_INLINE = 2 | |
|
14 | ||
|
12 | 15 | |
|
13 | 16 | def offset_type(offset, type): |
|
14 | 17 | if (type & ~repository.REVISION_FLAGS_KNOWN) != 0: |
|
15 | 18 | raise ValueError(b'unknown revlog index flags: %d' % type) |
|
16 | 19 | return int(int(offset) << 16 | type) |
|
20 | ||
|
21 | ||
|
22 | def entry( | |
|
23 | data_offset, | |
|
24 | data_compressed_length, | |
|
25 | data_delta_base, | |
|
26 | link_rev, | |
|
27 | parent_rev_1, | |
|
28 | parent_rev_2, | |
|
29 | node_id, | |
|
30 | flags=0, | |
|
31 | data_uncompressed_length=-1, | |
|
32 | data_compression_mode=COMP_MODE_INLINE, | |
|
33 | sidedata_offset=0, | |
|
34 | sidedata_compressed_length=0, | |
|
35 | sidedata_compression_mode=COMP_MODE_INLINE, | |
|
36 | ): | |
|
37 | """Build one entry from symbolic name | |
|
38 | ||
|
39 | This is useful to abstract the actual detail of how we build the entry | |
|
40 | tuple for caller who don't care about it. | |
|
41 | ||
|
42 | This should always be called using keyword arguments. Some arguments have | |
|
43 | default value, this match the value used by index version that does not store such data. | |
|
44 | """ | |
|
45 | return ( | |
|
46 | offset_type(data_offset, flags), | |
|
47 | data_compressed_length, | |
|
48 | data_uncompressed_length, | |
|
49 | data_delta_base, | |
|
50 | link_rev, | |
|
51 | parent_rev_1, | |
|
52 | parent_rev_2, | |
|
53 | node_id, | |
|
54 | sidedata_offset, | |
|
55 | sidedata_compressed_length, | |
|
56 | data_compression_mode, | |
|
57 | sidedata_compression_mode, | |
|
58 | ) |
@@ -1,281 +1,282 | |||
|
1 | 1 | # revlogdeltas.py - constant used for revlog logic. |
|
2 | 2 | # |
|
3 | 3 | # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com> |
|
4 | 4 | # Copyright 2018 Octobus <contact@octobus.net> |
|
5 | 5 | # |
|
6 | 6 | # This software may be used and distributed according to the terms of the |
|
7 | 7 | # GNU General Public License version 2 or any later version. |
|
8 | 8 | """Helper class to compute deltas stored inside revlogs""" |
|
9 | 9 | |
|
10 | 10 | from __future__ import absolute_import |
|
11 | 11 | |
|
12 | 12 | import struct |
|
13 | 13 | |
|
14 | 14 | from ..interfaces import repository |
|
15 | from .. import revlogutils | |
|
15 | 16 | |
|
16 | 17 | ### Internal utily constants |
|
17 | 18 | |
|
18 | 19 | KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes |
|
19 | 20 | KIND_MANIFESTLOG = 1002 |
|
20 | 21 | KIND_FILELOG = 1003 |
|
21 | 22 | KIND_OTHER = 1004 |
|
22 | 23 | |
|
23 | 24 | ALL_KINDS = { |
|
24 | 25 | KIND_CHANGELOG, |
|
25 | 26 | KIND_MANIFESTLOG, |
|
26 | 27 | KIND_FILELOG, |
|
27 | 28 | KIND_OTHER, |
|
28 | 29 | } |
|
29 | 30 | |
|
30 | 31 | ### Index entry key |
|
31 | 32 | # |
|
32 | 33 | # |
|
33 | 34 | # Internal details |
|
34 | 35 | # ---------------- |
|
35 | 36 | # |
|
36 | 37 | # A large part of the revlog logic deals with revisions' "index entries", tuple |
|
37 | 38 | # objects that contains the same "items" whatever the revlog version. |
|
38 | 39 | # Different versions will have different ways of storing these items (sometimes |
|
39 | 40 | # not having them at all), but the tuple will always be the same. New fields |
|
40 | 41 | # are usually added at the end to avoid breaking existing code that relies |
|
41 | 42 | # on the existing order. The field are defined as follows: |
|
42 | 43 | |
|
43 | 44 | # [0] offset: |
|
44 | 45 | # The byte index of the start of revision data chunk. |
|
45 | 46 | # That value is shifted up by 16 bits. use "offset = field >> 16" to |
|
46 | 47 | # retrieve it. |
|
47 | 48 | # |
|
48 | 49 | # flags: |
|
49 | 50 | # A flag field that carries special information or changes the behavior |
|
50 | 51 | # of the revision. (see `REVIDX_*` constants for details) |
|
51 | 52 | # The flag field only occupies the first 16 bits of this field, |
|
52 | 53 | # use "flags = field & 0xFFFF" to retrieve the value. |
|
53 | 54 | ENTRY_DATA_OFFSET = 0 |
|
54 | 55 | |
|
55 | 56 | # [1] compressed length: |
|
56 | 57 | # The size, in bytes, of the chunk on disk |
|
57 | 58 | ENTRY_DATA_COMPRESSED_LENGTH = 1 |
|
58 | 59 | |
|
59 | 60 | # [2] uncompressed length: |
|
60 | 61 | # The size, in bytes, of the full revision once reconstructed. |
|
61 | 62 | ENTRY_DATA_UNCOMPRESSED_LENGTH = 2 |
|
62 | 63 | |
|
63 | 64 | # [3] base rev: |
|
64 | 65 | # Either the base of the revision delta chain (without general |
|
65 | 66 | # delta), or the base of the delta (stored in the data chunk) |
|
66 | 67 | # with general delta. |
|
67 | 68 | ENTRY_DELTA_BASE = 3 |
|
68 | 69 | |
|
69 | 70 | # [4] link rev: |
|
70 | 71 | # Changelog revision number of the changeset introducing this |
|
71 | 72 | # revision. |
|
72 | 73 | ENTRY_LINK_REV = 4 |
|
73 | 74 | |
|
74 | 75 | # [5] parent 1 rev: |
|
75 | 76 | # Revision number of the first parent |
|
76 | 77 | ENTRY_PARENT_1 = 5 |
|
77 | 78 | |
|
78 | 79 | # [6] parent 2 rev: |
|
79 | 80 | # Revision number of the second parent |
|
80 | 81 | ENTRY_PARENT_2 = 6 |
|
81 | 82 | |
|
82 | 83 | # [7] node id: |
|
83 | 84 | # The node id of the current revision |
|
84 | 85 | ENTRY_NODE_ID = 7 |
|
85 | 86 | |
|
86 | 87 | # [8] sidedata offset: |
|
87 | 88 | # The byte index of the start of the revision's side-data chunk. |
|
88 | 89 | ENTRY_SIDEDATA_OFFSET = 8 |
|
89 | 90 | |
|
90 | 91 | # [9] sidedata chunk length: |
|
91 | 92 | # The size, in bytes, of the revision's side-data chunk. |
|
92 | 93 | ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9 |
|
93 | 94 | |
|
94 | 95 | # [10] data compression mode: |
|
95 | 96 | # two bits that detail the way the data chunk is compressed on disk. |
|
96 | 97 | # (see "COMP_MODE_*" constants for details). For revlog version 0 and |
|
97 | 98 | # 1 this will always be COMP_MODE_INLINE. |
|
98 | 99 | ENTRY_DATA_COMPRESSION_MODE = 10 |
|
99 | 100 | |
|
100 | 101 | # [11] side-data compression mode: |
|
101 | 102 | # two bits that detail the way the sidedata chunk is compressed on disk. |
|
102 | 103 | # (see "COMP_MODE_*" constants for details) |
|
103 | 104 | ENTRY_SIDEDATA_COMPRESSION_MODE = 11 |
|
104 | 105 | |
|
105 | 106 | ### main revlog header |
|
106 | 107 | |
|
107 | 108 | # We cannot rely on Struct.format is inconsistent for python <=3.6 versus above |
|
108 | 109 | INDEX_HEADER_FMT = b">I" |
|
109 | 110 | INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT) |
|
110 | 111 | |
|
111 | 112 | ## revlog version |
|
112 | 113 | REVLOGV0 = 0 |
|
113 | 114 | REVLOGV1 = 1 |
|
114 | 115 | # Dummy value until file format is finalized. |
|
115 | 116 | REVLOGV2 = 0xDEAD |
|
116 | 117 | # Dummy value until file format is finalized. |
|
117 | 118 | CHANGELOGV2 = 0xD34D |
|
118 | 119 | |
|
119 | 120 | ## global revlog header flags |
|
120 | 121 | # Shared across v1 and v2. |
|
121 | 122 | FLAG_INLINE_DATA = 1 << 16 |
|
122 | 123 | # Only used by v1, implied by v2. |
|
123 | 124 | FLAG_GENERALDELTA = 1 << 17 |
|
124 | 125 | REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA |
|
125 | 126 | REVLOG_DEFAULT_FORMAT = REVLOGV1 |
|
126 | 127 | REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS |
|
127 | 128 | REVLOGV0_FLAGS = 0 |
|
128 | 129 | REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA |
|
129 | 130 | REVLOGV2_FLAGS = FLAG_INLINE_DATA |
|
130 | 131 | CHANGELOGV2_FLAGS = 0 |
|
131 | 132 | |
|
132 | 133 | ### individual entry |
|
133 | 134 | |
|
134 | 135 | ## index v0: |
|
135 | 136 | # 4 bytes: offset |
|
136 | 137 | # 4 bytes: compressed length |
|
137 | 138 | # 4 bytes: base rev |
|
138 | 139 | # 4 bytes: link rev |
|
139 | 140 | # 20 bytes: parent 1 nodeid |
|
140 | 141 | # 20 bytes: parent 2 nodeid |
|
141 | 142 | # 20 bytes: nodeid |
|
142 | 143 | INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s") |
|
143 | 144 | |
|
144 | 145 | ## index v1 |
|
145 | 146 | # 6 bytes: offset |
|
146 | 147 | # 2 bytes: flags |
|
147 | 148 | # 4 bytes: compressed length |
|
148 | 149 | # 4 bytes: uncompressed length |
|
149 | 150 | # 4 bytes: base rev |
|
150 | 151 | # 4 bytes: link rev |
|
151 | 152 | # 4 bytes: parent 1 rev |
|
152 | 153 | # 4 bytes: parent 2 rev |
|
153 | 154 | # 32 bytes: nodeid |
|
154 | 155 | INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x") |
|
155 | 156 | assert INDEX_ENTRY_V1.size == 32 * 2 |
|
156 | 157 | |
|
157 | 158 | # 6 bytes: offset |
|
158 | 159 | # 2 bytes: flags |
|
159 | 160 | # 4 bytes: compressed length |
|
160 | 161 | # 4 bytes: uncompressed length |
|
161 | 162 | # 4 bytes: base rev |
|
162 | 163 | # 4 bytes: link rev |
|
163 | 164 | # 4 bytes: parent 1 rev |
|
164 | 165 | # 4 bytes: parent 2 rev |
|
165 | 166 | # 32 bytes: nodeid |
|
166 | 167 | # 8 bytes: sidedata offset |
|
167 | 168 | # 4 bytes: sidedata compressed length |
|
168 | 169 | # 1 bytes: compression mode (2 lower bit are data_compression_mode) |
|
169 | 170 | # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) |
|
170 | 171 | INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x") |
|
171 | 172 | assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size |
|
172 | 173 | |
|
173 | 174 | # 6 bytes: offset |
|
174 | 175 | # 2 bytes: flags |
|
175 | 176 | # 4 bytes: compressed length |
|
176 | 177 | # 4 bytes: uncompressed length |
|
177 | 178 | # 4 bytes: parent 1 rev |
|
178 | 179 | # 4 bytes: parent 2 rev |
|
179 | 180 | # 32 bytes: nodeid |
|
180 | 181 | # 8 bytes: sidedata offset |
|
181 | 182 | # 4 bytes: sidedata compressed length |
|
182 | 183 | # 1 bytes: compression mode (2 lower bit are data_compression_mode) |
|
183 | 184 | # 27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page) |
|
184 | 185 | INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x") |
|
185 | 186 | assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size |
|
186 | 187 | |
|
187 | 188 | # revlog index flags |
|
188 | 189 | |
|
189 | 190 | # For historical reasons, revlog's internal flags were exposed via the |
|
190 | 191 | # wire protocol and are even exposed in parts of the storage APIs. |
|
191 | 192 | |
|
192 | 193 | # revision has censor metadata, must be verified |
|
193 | 194 | REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED |
|
194 | 195 | # revision hash does not match data (narrowhg) |
|
195 | 196 | REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS |
|
196 | 197 | # revision data is stored externally |
|
197 | 198 | REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED |
|
198 | 199 | # revision changes files in a way that could affect copy tracing. |
|
199 | 200 | REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO |
|
200 | 201 | REVIDX_DEFAULT_FLAGS = 0 |
|
201 | 202 | # stable order in which flags need to be processed and their processors applied |
|
202 | 203 | REVIDX_FLAGS_ORDER = [ |
|
203 | 204 | REVIDX_ISCENSORED, |
|
204 | 205 | REVIDX_ELLIPSIS, |
|
205 | 206 | REVIDX_EXTSTORED, |
|
206 | 207 | REVIDX_HASCOPIESINFO, |
|
207 | 208 | ] |
|
208 | 209 | |
|
209 | 210 | # bitmark for flags that could cause rawdata content change |
|
210 | 211 | REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED |
|
211 | 212 | |
|
212 | 213 | ## chunk compression mode constants: |
|
213 | 214 | # These constants are used in revlog version >=2 to denote the compression used |
|
214 | 215 | # for a chunk. |
|
215 | 216 | |
|
216 | 217 | # Chunk use no compression, the data stored on disk can be directly use as |
|
217 | 218 | # chunk value. Without any header information prefixed. |
|
218 | 219 | COMP_MODE_PLAIN = 0 |
|
219 | 220 | |
|
220 | 221 | # Chunk use the "default compression" for the revlog (usually defined in the |
|
221 | 222 | # revlog docket). A header is still used. |
|
222 | 223 | # |
|
223 | 224 | # XXX: keeping a header is probably not useful and we should probably drop it. |
|
224 | 225 | # |
|
225 | 226 | # XXX: The value of allow mixed type of compression in the revlog is unclear |
|
226 | 227 | # and we should consider making PLAIN/DEFAULT the only available mode for |
|
227 | 228 | # revlog v2, disallowing INLINE mode. |
|
228 | 229 | COMP_MODE_DEFAULT = 1 |
|
229 | 230 | |
|
230 | 231 | # Chunk use a compression mode stored "inline" at the start of the chunk |
|
231 | 232 | # itself. This is the mode always used for revlog version "0" and "1" |
|
232 | COMP_MODE_INLINE = 2 | |
|
233 | COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE | |
|
233 | 234 | |
|
234 | 235 | SUPPORTED_FLAGS = { |
|
235 | 236 | REVLOGV0: REVLOGV0_FLAGS, |
|
236 | 237 | REVLOGV1: REVLOGV1_FLAGS, |
|
237 | 238 | REVLOGV2: REVLOGV2_FLAGS, |
|
238 | 239 | CHANGELOGV2: CHANGELOGV2_FLAGS, |
|
239 | 240 | } |
|
240 | 241 | |
|
241 | 242 | _no = lambda flags: False |
|
242 | 243 | _yes = lambda flags: True |
|
243 | 244 | |
|
244 | 245 | |
|
245 | 246 | def _from_flag(flag): |
|
246 | 247 | return lambda flags: bool(flags & flag) |
|
247 | 248 | |
|
248 | 249 | |
|
249 | 250 | FEATURES_BY_VERSION = { |
|
250 | 251 | REVLOGV0: { |
|
251 | 252 | b'inline': _no, |
|
252 | 253 | b'generaldelta': _no, |
|
253 | 254 | b'sidedata': False, |
|
254 | 255 | b'docket': False, |
|
255 | 256 | }, |
|
256 | 257 | REVLOGV1: { |
|
257 | 258 | b'inline': _from_flag(FLAG_INLINE_DATA), |
|
258 | 259 | b'generaldelta': _from_flag(FLAG_GENERALDELTA), |
|
259 | 260 | b'sidedata': False, |
|
260 | 261 | b'docket': False, |
|
261 | 262 | }, |
|
262 | 263 | REVLOGV2: { |
|
263 | 264 | # The point of inline-revlog is to reduce the number of files used in |
|
264 | 265 | # the store. Using a docket defeat this purpose. So we needs other |
|
265 | 266 | # means to reduce the number of files for revlogv2. |
|
266 | 267 | b'inline': _no, |
|
267 | 268 | b'generaldelta': _yes, |
|
268 | 269 | b'sidedata': True, |
|
269 | 270 | b'docket': True, |
|
270 | 271 | }, |
|
271 | 272 | CHANGELOGV2: { |
|
272 | 273 | b'inline': _no, |
|
273 | 274 | # General delta is useless for changelog since we don't do any delta |
|
274 | 275 | b'generaldelta': _no, |
|
275 | 276 | b'sidedata': True, |
|
276 | 277 | b'docket': True, |
|
277 | 278 | }, |
|
278 | 279 | } |
|
279 | 280 | |
|
280 | 281 | |
|
281 | 282 | SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000 |
General Comments 0
You need to be logged in to leave comments.
Login now