##// END OF EJS Templates
revlog: add a function to build index entry tuple...
marmoute -
r48187:a669404f default
parent child Browse files
Show More
@@ -1,16 +1,58 b''
1 1 # mercurial.revlogutils -- basic utilities for revlog
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..interfaces import repository
11 11
12 # See mercurial.revlogutils.constants for doc
13 COMP_MODE_INLINE = 2
14
12 15
13 16 def offset_type(offset, type):
14 17 if (type & ~repository.REVISION_FLAGS_KNOWN) != 0:
15 18 raise ValueError(b'unknown revlog index flags: %d' % type)
16 19 return int(int(offset) << 16 | type)
20
21
22 def entry(
23 data_offset,
24 data_compressed_length,
25 data_delta_base,
26 link_rev,
27 parent_rev_1,
28 parent_rev_2,
29 node_id,
30 flags=0,
31 data_uncompressed_length=-1,
32 data_compression_mode=COMP_MODE_INLINE,
33 sidedata_offset=0,
34 sidedata_compressed_length=0,
35 sidedata_compression_mode=COMP_MODE_INLINE,
36 ):
37 """Build one entry from symbolic name
38
39 This is useful to abstract the actual detail of how we build the entry
40 tuple for caller who don't care about it.
41
42 This should always be called using keyword arguments. Some arguments have
43 default value, this match the value used by index version that does not store such data.
44 """
45 return (
46 offset_type(data_offset, flags),
47 data_compressed_length,
48 data_uncompressed_length,
49 data_delta_base,
50 link_rev,
51 parent_rev_1,
52 parent_rev_2,
53 node_id,
54 sidedata_offset,
55 sidedata_compressed_length,
56 data_compression_mode,
57 sidedata_compression_mode,
58 )
@@ -1,281 +1,282 b''
1 1 # revlogdeltas.py - constant used for revlog logic.
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 import struct
13 13
14 14 from ..interfaces import repository
15 from .. import revlogutils
15 16
16 17 ### Internal utily constants
17 18
18 19 KIND_CHANGELOG = 1001 # over 256 to not be comparable with a bytes
19 20 KIND_MANIFESTLOG = 1002
20 21 KIND_FILELOG = 1003
21 22 KIND_OTHER = 1004
22 23
23 24 ALL_KINDS = {
24 25 KIND_CHANGELOG,
25 26 KIND_MANIFESTLOG,
26 27 KIND_FILELOG,
27 28 KIND_OTHER,
28 29 }
29 30
30 31 ### Index entry key
31 32 #
32 33 #
33 34 # Internal details
34 35 # ----------------
35 36 #
36 37 # A large part of the revlog logic deals with revisions' "index entries", tuple
37 38 # objects that contains the same "items" whatever the revlog version.
38 39 # Different versions will have different ways of storing these items (sometimes
39 40 # not having them at all), but the tuple will always be the same. New fields
40 41 # are usually added at the end to avoid breaking existing code that relies
41 42 # on the existing order. The field are defined as follows:
42 43
43 44 # [0] offset:
44 45 # The byte index of the start of revision data chunk.
45 46 # That value is shifted up by 16 bits. use "offset = field >> 16" to
46 47 # retrieve it.
47 48 #
48 49 # flags:
49 50 # A flag field that carries special information or changes the behavior
50 51 # of the revision. (see `REVIDX_*` constants for details)
51 52 # The flag field only occupies the first 16 bits of this field,
52 53 # use "flags = field & 0xFFFF" to retrieve the value.
53 54 ENTRY_DATA_OFFSET = 0
54 55
55 56 # [1] compressed length:
56 57 # The size, in bytes, of the chunk on disk
57 58 ENTRY_DATA_COMPRESSED_LENGTH = 1
58 59
59 60 # [2] uncompressed length:
60 61 # The size, in bytes, of the full revision once reconstructed.
61 62 ENTRY_DATA_UNCOMPRESSED_LENGTH = 2
62 63
63 64 # [3] base rev:
64 65 # Either the base of the revision delta chain (without general
65 66 # delta), or the base of the delta (stored in the data chunk)
66 67 # with general delta.
67 68 ENTRY_DELTA_BASE = 3
68 69
69 70 # [4] link rev:
70 71 # Changelog revision number of the changeset introducing this
71 72 # revision.
72 73 ENTRY_LINK_REV = 4
73 74
74 75 # [5] parent 1 rev:
75 76 # Revision number of the first parent
76 77 ENTRY_PARENT_1 = 5
77 78
78 79 # [6] parent 2 rev:
79 80 # Revision number of the second parent
80 81 ENTRY_PARENT_2 = 6
81 82
82 83 # [7] node id:
83 84 # The node id of the current revision
84 85 ENTRY_NODE_ID = 7
85 86
86 87 # [8] sidedata offset:
87 88 # The byte index of the start of the revision's side-data chunk.
88 89 ENTRY_SIDEDATA_OFFSET = 8
89 90
90 91 # [9] sidedata chunk length:
91 92 # The size, in bytes, of the revision's side-data chunk.
92 93 ENTRY_SIDEDATA_COMPRESSED_LENGTH = 9
93 94
94 95 # [10] data compression mode:
95 96 # two bits that detail the way the data chunk is compressed on disk.
96 97 # (see "COMP_MODE_*" constants for details). For revlog version 0 and
97 98 # 1 this will always be COMP_MODE_INLINE.
98 99 ENTRY_DATA_COMPRESSION_MODE = 10
99 100
100 101 # [11] side-data compression mode:
101 102 # two bits that detail the way the sidedata chunk is compressed on disk.
102 103 # (see "COMP_MODE_*" constants for details)
103 104 ENTRY_SIDEDATA_COMPRESSION_MODE = 11
104 105
105 106 ### main revlog header
106 107
107 108 # We cannot rely on Struct.format is inconsistent for python <=3.6 versus above
108 109 INDEX_HEADER_FMT = b">I"
109 110 INDEX_HEADER = struct.Struct(INDEX_HEADER_FMT)
110 111
111 112 ## revlog version
112 113 REVLOGV0 = 0
113 114 REVLOGV1 = 1
114 115 # Dummy value until file format is finalized.
115 116 REVLOGV2 = 0xDEAD
116 117 # Dummy value until file format is finalized.
117 118 CHANGELOGV2 = 0xD34D
118 119
119 120 ## global revlog header flags
120 121 # Shared across v1 and v2.
121 122 FLAG_INLINE_DATA = 1 << 16
122 123 # Only used by v1, implied by v2.
123 124 FLAG_GENERALDELTA = 1 << 17
124 125 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
125 126 REVLOG_DEFAULT_FORMAT = REVLOGV1
126 127 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
127 128 REVLOGV0_FLAGS = 0
128 129 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
129 130 REVLOGV2_FLAGS = FLAG_INLINE_DATA
130 131 CHANGELOGV2_FLAGS = 0
131 132
132 133 ### individual entry
133 134
134 135 ## index v0:
135 136 # 4 bytes: offset
136 137 # 4 bytes: compressed length
137 138 # 4 bytes: base rev
138 139 # 4 bytes: link rev
139 140 # 20 bytes: parent 1 nodeid
140 141 # 20 bytes: parent 2 nodeid
141 142 # 20 bytes: nodeid
142 143 INDEX_ENTRY_V0 = struct.Struct(b">4l20s20s20s")
143 144
144 145 ## index v1
145 146 # 6 bytes: offset
146 147 # 2 bytes: flags
147 148 # 4 bytes: compressed length
148 149 # 4 bytes: uncompressed length
149 150 # 4 bytes: base rev
150 151 # 4 bytes: link rev
151 152 # 4 bytes: parent 1 rev
152 153 # 4 bytes: parent 2 rev
153 154 # 32 bytes: nodeid
154 155 INDEX_ENTRY_V1 = struct.Struct(b">Qiiiiii20s12x")
155 156 assert INDEX_ENTRY_V1.size == 32 * 2
156 157
157 158 # 6 bytes: offset
158 159 # 2 bytes: flags
159 160 # 4 bytes: compressed length
160 161 # 4 bytes: uncompressed length
161 162 # 4 bytes: base rev
162 163 # 4 bytes: link rev
163 164 # 4 bytes: parent 1 rev
164 165 # 4 bytes: parent 2 rev
165 166 # 32 bytes: nodeid
166 167 # 8 bytes: sidedata offset
167 168 # 4 bytes: sidedata compressed length
168 169 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
169 170 # 19 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
170 171 INDEX_ENTRY_V2 = struct.Struct(b">Qiiiiii20s12xQiB19x")
171 172 assert INDEX_ENTRY_V2.size == 32 * 3, INDEX_ENTRY_V2.size
172 173
173 174 # 6 bytes: offset
174 175 # 2 bytes: flags
175 176 # 4 bytes: compressed length
176 177 # 4 bytes: uncompressed length
177 178 # 4 bytes: parent 1 rev
178 179 # 4 bytes: parent 2 rev
179 180 # 32 bytes: nodeid
180 181 # 8 bytes: sidedata offset
181 182 # 4 bytes: sidedata compressed length
182 183 # 1 bytes: compression mode (2 lower bit are data_compression_mode)
183 184 # 27 bytes: Padding to align to 96 bytes (see RevlogV2Plan wiki page)
184 185 INDEX_ENTRY_CL_V2 = struct.Struct(b">Qiiii20s12xQiB27x")
185 186 assert INDEX_ENTRY_CL_V2.size == 32 * 3, INDEX_ENTRY_V2.size
186 187
187 188 # revlog index flags
188 189
189 190 # For historical reasons, revlog's internal flags were exposed via the
190 191 # wire protocol and are even exposed in parts of the storage APIs.
191 192
192 193 # revision has censor metadata, must be verified
193 194 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
194 195 # revision hash does not match data (narrowhg)
195 196 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
196 197 # revision data is stored externally
197 198 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
198 199 # revision changes files in a way that could affect copy tracing.
199 200 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
200 201 REVIDX_DEFAULT_FLAGS = 0
201 202 # stable order in which flags need to be processed and their processors applied
202 203 REVIDX_FLAGS_ORDER = [
203 204 REVIDX_ISCENSORED,
204 205 REVIDX_ELLIPSIS,
205 206 REVIDX_EXTSTORED,
206 207 REVIDX_HASCOPIESINFO,
207 208 ]
208 209
209 210 # bitmark for flags that could cause rawdata content change
210 211 REVIDX_RAWTEXT_CHANGING_FLAGS = REVIDX_ISCENSORED | REVIDX_EXTSTORED
211 212
212 213 ## chunk compression mode constants:
213 214 # These constants are used in revlog version >=2 to denote the compression used
214 215 # for a chunk.
215 216
216 217 # Chunk use no compression, the data stored on disk can be directly use as
217 218 # chunk value. Without any header information prefixed.
218 219 COMP_MODE_PLAIN = 0
219 220
220 221 # Chunk use the "default compression" for the revlog (usually defined in the
221 222 # revlog docket). A header is still used.
222 223 #
223 224 # XXX: keeping a header is probably not useful and we should probably drop it.
224 225 #
225 226 # XXX: The value of allow mixed type of compression in the revlog is unclear
226 227 # and we should consider making PLAIN/DEFAULT the only available mode for
227 228 # revlog v2, disallowing INLINE mode.
228 229 COMP_MODE_DEFAULT = 1
229 230
230 231 # Chunk use a compression mode stored "inline" at the start of the chunk
231 232 # itself. This is the mode always used for revlog version "0" and "1"
232 COMP_MODE_INLINE = 2
233 COMP_MODE_INLINE = revlogutils.COMP_MODE_INLINE
233 234
234 235 SUPPORTED_FLAGS = {
235 236 REVLOGV0: REVLOGV0_FLAGS,
236 237 REVLOGV1: REVLOGV1_FLAGS,
237 238 REVLOGV2: REVLOGV2_FLAGS,
238 239 CHANGELOGV2: CHANGELOGV2_FLAGS,
239 240 }
240 241
241 242 _no = lambda flags: False
242 243 _yes = lambda flags: True
243 244
244 245
245 246 def _from_flag(flag):
246 247 return lambda flags: bool(flags & flag)
247 248
248 249
249 250 FEATURES_BY_VERSION = {
250 251 REVLOGV0: {
251 252 b'inline': _no,
252 253 b'generaldelta': _no,
253 254 b'sidedata': False,
254 255 b'docket': False,
255 256 },
256 257 REVLOGV1: {
257 258 b'inline': _from_flag(FLAG_INLINE_DATA),
258 259 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
259 260 b'sidedata': False,
260 261 b'docket': False,
261 262 },
262 263 REVLOGV2: {
263 264 # The point of inline-revlog is to reduce the number of files used in
264 265 # the store. Using a docket defeat this purpose. So we needs other
265 266 # means to reduce the number of files for revlogv2.
266 267 b'inline': _no,
267 268 b'generaldelta': _yes,
268 269 b'sidedata': True,
269 270 b'docket': True,
270 271 },
271 272 CHANGELOGV2: {
272 273 b'inline': _no,
273 274 # General delta is useless for changelog since we don't do any delta
274 275 b'generaldelta': _no,
275 276 b'sidedata': True,
276 277 b'docket': True,
277 278 },
278 279 }
279 280
280 281
281 282 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
General Comments 0
You need to be logged in to leave comments. Login now