##// END OF EJS Templates
rust-revlog: teach the revlog opening code to read the repo options...
Raphaël Gomès -
r52084:13f58ce7 default
parent child Browse files
Show More
@@ -1,4056 +1,4059 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import functools
19 import io
20 import io
20 import os
21 import os
21 import struct
22 import struct
22 import weakref
23 import weakref
23 import zlib
24 import zlib
24
25
25 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
26 from .node import (
27 from .node import (
27 bin,
28 bin,
28 hex,
29 hex,
29 nullrev,
30 nullrev,
30 sha1nodeconstants,
31 sha1nodeconstants,
31 short,
32 short,
32 wdirrev,
33 wdirrev,
33 )
34 )
34 from .i18n import _
35 from .i18n import _
35 from .revlogutils.constants import (
36 from .revlogutils.constants import (
36 ALL_KINDS,
37 ALL_KINDS,
37 CHANGELOGV2,
38 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
44 ENTRY_RANK,
44 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
47 INDEX_HEADER,
48 INDEX_HEADER,
48 KIND_CHANGELOG,
49 KIND_CHANGELOG,
49 KIND_FILELOG,
50 KIND_FILELOG,
50 RANK_UNKNOWN,
51 RANK_UNKNOWN,
51 REVLOGV0,
52 REVLOGV0,
52 REVLOGV1,
53 REVLOGV1,
53 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
54 REVLOGV2,
55 REVLOGV2,
55 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
60 )
61 )
61 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
70 )
70 from .thirdparty import attr
71 from .thirdparty import attr
71 from . import (
72 from . import (
72 ancestor,
73 ancestor,
73 dagop,
74 dagop,
74 error,
75 error,
75 mdiff,
76 mdiff,
76 policy,
77 policy,
77 pycompat,
78 pycompat,
78 revlogutils,
79 revlogutils,
79 templatefilters,
80 templatefilters,
80 util,
81 util,
81 )
82 )
82 from .interfaces import (
83 from .interfaces import (
83 repository,
84 repository,
84 util as interfaceutil,
85 util as interfaceutil,
85 )
86 )
86 from .revlogutils import (
87 from .revlogutils import (
87 deltas as deltautil,
88 deltas as deltautil,
88 docket as docketutil,
89 docket as docketutil,
89 flagutil,
90 flagutil,
90 nodemap as nodemaputil,
91 nodemap as nodemaputil,
91 randomaccessfile,
92 randomaccessfile,
92 revlogv0,
93 revlogv0,
93 rewrite,
94 rewrite,
94 sidedata as sidedatautil,
95 sidedata as sidedatautil,
95 )
96 )
96 from .utils import (
97 from .utils import (
97 storageutil,
98 storageutil,
98 stringutil,
99 stringutil,
99 )
100 )
100
101
101 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
103
104
104 REVLOGV0
105 REVLOGV0
105 REVLOGV1
106 REVLOGV1
106 REVLOGV2
107 REVLOGV2
107 CHANGELOGV2
108 CHANGELOGV2
108 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122
123
123 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
127
128
128 # Aliased for performance.
129 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
130
131
131 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
132 _maxinline = 131072
133 _maxinline = 131072
133
134
134 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
136 return text, False
137 return text, False
137
138
138
139
139 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
140 return text, False
141 return text, False
141
142
142
143
143 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
144 return False
145 return False
145
146
146
147
147 ellipsisprocessor = (
148 ellipsisprocessor = (
148 ellipsisreadprocessor,
149 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
151 ellipsisrawprocessor,
151 )
152 )
152
153
153
154
154 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
157 if skipflags:
158 if skipflags:
158 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
159 else:
160 else:
160 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
161 rl.revision(node)
162 rl.revision(node)
162
163
163
164
164 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
165 #
166 #
166 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
171 )
172 )
172
173
173
174
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
176 @attr.s(slots=True)
176 class revlogrevisiondelta:
177 class revlogrevisiondelta:
177 node = attr.ib()
178 node = attr.ib()
178 p1node = attr.ib()
179 p1node = attr.ib()
179 p2node = attr.ib()
180 p2node = attr.ib()
180 basenode = attr.ib()
181 basenode = attr.ib()
181 flags = attr.ib()
182 flags = attr.ib()
182 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
183 revision = attr.ib()
184 revision = attr.ib()
184 delta = attr.ib()
185 delta = attr.ib()
185 sidedata = attr.ib()
186 sidedata = attr.ib()
186 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
188
189
189
190
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
192 @attr.s(frozen=True)
192 class revlogproblem:
193 class revlogproblem:
193 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
195 error = attr.ib(default=None)
195 node = attr.ib(default=None)
196 node = attr.ib(default=None)
196
197
197
198
198 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
202 return index, cache
202
203
203
204
204 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
208 return index, cache
208
209
209
210
210 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
214 return index, cache
214
215
215
216
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
218
218 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
221 return index, cache
221
222
222
223
223 else:
224 else:
224 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
225
226
226
227
227 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline, default_header):
228 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index, data), cache
230 return rustrevlog.MixedIndex(index, data, default_header), cache
230
231
231
232
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
234 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
235
236
236 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
239 )
240 )
240
241
241 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
242
243
243
244
244 class _Config:
245 class _Config:
245 def copy(self):
246 def copy(self):
246 return self.__class__(**self.__dict__)
247 return self.__class__(**self.__dict__)
247
248
248
249
249 @attr.s()
250 @attr.s()
250 class FeatureConfig(_Config):
251 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
252 """Hold configuration values about the available revlog features"""
252
253
253 # the default compression engine
254 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
255 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
256 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
258
258 # can we use censor on this revlog
259 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
260 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
261 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
262 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
263 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
264 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
265 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
266 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
267 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
268 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
269 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
270 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
271 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
272 enable_ellipsis = attr.ib(default=False)
272
273
273 def copy(self):
274 def copy(self):
274 new = super().copy()
275 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
277 return new
277
278
278
279
279 @attr.s()
280 @attr.s()
280 class DataConfig(_Config):
281 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
282 """Hold configuration value about how the revlog data are read"""
282
283
283 # should we try to open the "pending" version of the revlog
284 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
285 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
286 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
287 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
289 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
290 check_ambig = attr.ib(default=False)
290
291
291 # If true, use mmap instead of reading to deal with large index
292 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
293 mmap_large_index = attr.ib(default=False)
293 # how much data is large
294 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
295 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
296 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
297 chunk_cache_size = attr.ib(default=65536)
297
298
298 # The size of the uncompressed cache compared to the largest revision seen.
299 # The size of the uncompressed cache compared to the largest revision seen.
299 uncompressed_cache_factor = attr.ib(default=None)
300 uncompressed_cache_factor = attr.ib(default=None)
300
301
301 # The number of chunk cached
302 # The number of chunk cached
302 uncompressed_cache_count = attr.ib(default=None)
303 uncompressed_cache_count = attr.ib(default=None)
303
304
304 # Allow sparse reading of the revlog data
305 # Allow sparse reading of the revlog data
305 with_sparse_read = attr.ib(default=False)
306 with_sparse_read = attr.ib(default=False)
306 # minimal density of a sparse read chunk
307 # minimal density of a sparse read chunk
307 sr_density_threshold = attr.ib(default=0.50)
308 sr_density_threshold = attr.ib(default=0.50)
308 # minimal size of data we skip when performing sparse read
309 # minimal size of data we skip when performing sparse read
309 sr_min_gap_size = attr.ib(default=262144)
310 sr_min_gap_size = attr.ib(default=262144)
310
311
311 # are delta encoded against arbitrary bases.
312 # are delta encoded against arbitrary bases.
312 generaldelta = attr.ib(default=False)
313 generaldelta = attr.ib(default=False)
313
314
314
315
315 @attr.s()
316 @attr.s()
316 class DeltaConfig(_Config):
317 class DeltaConfig(_Config):
317 """Hold configuration value about how new delta are computed
318 """Hold configuration value about how new delta are computed
318
319
319 Some attributes are duplicated from DataConfig to help havign each object
320 Some attributes are duplicated from DataConfig to help havign each object
320 self contained.
321 self contained.
321 """
322 """
322
323
323 # can delta be encoded against arbitrary bases.
324 # can delta be encoded against arbitrary bases.
324 general_delta = attr.ib(default=False)
325 general_delta = attr.ib(default=False)
325 # Allow sparse writing of the revlog data
326 # Allow sparse writing of the revlog data
326 sparse_revlog = attr.ib(default=False)
327 sparse_revlog = attr.ib(default=False)
327 # maximum length of a delta chain
328 # maximum length of a delta chain
328 max_chain_len = attr.ib(default=None)
329 max_chain_len = attr.ib(default=None)
329 # Maximum distance between delta chain base start and end
330 # Maximum distance between delta chain base start and end
330 max_deltachain_span = attr.ib(default=-1)
331 max_deltachain_span = attr.ib(default=-1)
331 # If `upper_bound_comp` is not None, this is the expected maximal gain from
332 # If `upper_bound_comp` is not None, this is the expected maximal gain from
332 # compression for the data content.
333 # compression for the data content.
333 upper_bound_comp = attr.ib(default=None)
334 upper_bound_comp = attr.ib(default=None)
334 # Should we try a delta against both parent
335 # Should we try a delta against both parent
335 delta_both_parents = attr.ib(default=True)
336 delta_both_parents = attr.ib(default=True)
336 # Test delta base candidate group by chunk of this maximal size.
337 # Test delta base candidate group by chunk of this maximal size.
337 candidate_group_chunk_size = attr.ib(default=0)
338 candidate_group_chunk_size = attr.ib(default=0)
338 # Should we display debug information about delta computation
339 # Should we display debug information about delta computation
339 debug_delta = attr.ib(default=False)
340 debug_delta = attr.ib(default=False)
340 # trust incoming delta by default
341 # trust incoming delta by default
341 lazy_delta = attr.ib(default=True)
342 lazy_delta = attr.ib(default=True)
342 # trust the base of incoming delta by default
343 # trust the base of incoming delta by default
343 lazy_delta_base = attr.ib(default=False)
344 lazy_delta_base = attr.ib(default=False)
344
345
345
346
346 class _InnerRevlog:
347 class _InnerRevlog:
347 """An inner layer of the revlog object
348 """An inner layer of the revlog object
348
349
349 That layer exist to be able to delegate some operation to Rust, its
350 That layer exist to be able to delegate some operation to Rust, its
350 boundaries are arbitrary and based on what we can delegate to Rust.
351 boundaries are arbitrary and based on what we can delegate to Rust.
351 """
352 """
352
353
353 def __init__(
354 def __init__(
354 self,
355 self,
355 opener,
356 opener,
356 index,
357 index,
357 index_file,
358 index_file,
358 data_file,
359 data_file,
359 sidedata_file,
360 sidedata_file,
360 inline,
361 inline,
361 data_config,
362 data_config,
362 delta_config,
363 delta_config,
363 feature_config,
364 feature_config,
364 chunk_cache,
365 chunk_cache,
365 default_compression_header,
366 default_compression_header,
366 ):
367 ):
367 self.opener = opener
368 self.opener = opener
368 self.index = index
369 self.index = index
369
370
370 self.index_file = index_file
371 self.index_file = index_file
371 self.data_file = data_file
372 self.data_file = data_file
372 self.sidedata_file = sidedata_file
373 self.sidedata_file = sidedata_file
373 self.inline = inline
374 self.inline = inline
374 self.data_config = data_config
375 self.data_config = data_config
375 self.delta_config = delta_config
376 self.delta_config = delta_config
376 self.feature_config = feature_config
377 self.feature_config = feature_config
377
378
378 # used during diverted write.
379 # used during diverted write.
379 self._orig_index_file = None
380 self._orig_index_file = None
380
381
381 self._default_compression_header = default_compression_header
382 self._default_compression_header = default_compression_header
382
383
383 # index
384 # index
384
385
385 # 3-tuple of file handles being used for active writing.
386 # 3-tuple of file handles being used for active writing.
386 self._writinghandles = None
387 self._writinghandles = None
387
388
388 self._segmentfile = randomaccessfile.randomaccessfile(
389 self._segmentfile = randomaccessfile.randomaccessfile(
389 self.opener,
390 self.opener,
390 (self.index_file if self.inline else self.data_file),
391 (self.index_file if self.inline else self.data_file),
391 self.data_config.chunk_cache_size,
392 self.data_config.chunk_cache_size,
392 chunk_cache,
393 chunk_cache,
393 )
394 )
394 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
395 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
395 self.opener,
396 self.opener,
396 self.sidedata_file,
397 self.sidedata_file,
397 self.data_config.chunk_cache_size,
398 self.data_config.chunk_cache_size,
398 )
399 )
399
400
400 # revlog header -> revlog compressor
401 # revlog header -> revlog compressor
401 self._decompressors = {}
402 self._decompressors = {}
402 # 3-tuple of (node, rev, text) for a raw revision.
403 # 3-tuple of (node, rev, text) for a raw revision.
403 self._revisioncache = None
404 self._revisioncache = None
404
405
405 # cache some uncompressed chunks
406 # cache some uncompressed chunks
406 # rev → uncompressed_chunk
407 # rev → uncompressed_chunk
407 #
408 #
408 # the max cost is dynamically updated to be proportionnal to the
409 # the max cost is dynamically updated to be proportionnal to the
409 # size of revision we actually encounter.
410 # size of revision we actually encounter.
410 self._uncompressed_chunk_cache = None
411 self._uncompressed_chunk_cache = None
411 if self.data_config.uncompressed_cache_factor is not None:
412 if self.data_config.uncompressed_cache_factor is not None:
412 self._uncompressed_chunk_cache = util.lrucachedict(
413 self._uncompressed_chunk_cache = util.lrucachedict(
413 self.data_config.uncompressed_cache_count,
414 self.data_config.uncompressed_cache_count,
414 maxcost=65536, # some arbitrary initial value
415 maxcost=65536, # some arbitrary initial value
415 )
416 )
416
417
417 self._delay_buffer = None
418 self._delay_buffer = None
418
419
419 def __len__(self):
420 def __len__(self):
420 return len(self.index)
421 return len(self.index)
421
422
422 def clear_cache(self):
423 def clear_cache(self):
423 assert not self.is_delaying
424 assert not self.is_delaying
424 self._revisioncache = None
425 self._revisioncache = None
425 if self._uncompressed_chunk_cache is not None:
426 if self._uncompressed_chunk_cache is not None:
426 self._uncompressed_chunk_cache.clear()
427 self._uncompressed_chunk_cache.clear()
427 self._segmentfile.clear_cache()
428 self._segmentfile.clear_cache()
428 self._segmentfile_sidedata.clear_cache()
429 self._segmentfile_sidedata.clear_cache()
429
430
430 @property
431 @property
431 def canonical_index_file(self):
432 def canonical_index_file(self):
432 if self._orig_index_file is not None:
433 if self._orig_index_file is not None:
433 return self._orig_index_file
434 return self._orig_index_file
434 return self.index_file
435 return self.index_file
435
436
436 @property
437 @property
437 def is_delaying(self):
438 def is_delaying(self):
438 """is the revlog is currently delaying the visibility of written data?
439 """is the revlog is currently delaying the visibility of written data?
439
440
440 The delaying mechanism can be either in-memory or written on disk in a
441 The delaying mechanism can be either in-memory or written on disk in a
441 side-file."""
442 side-file."""
442 return (self._delay_buffer is not None) or (
443 return (self._delay_buffer is not None) or (
443 self._orig_index_file is not None
444 self._orig_index_file is not None
444 )
445 )
445
446
446 # Derived from index values.
447 # Derived from index values.
447
448
448 def start(self, rev):
449 def start(self, rev):
449 """the offset of the data chunk for this revision"""
450 """the offset of the data chunk for this revision"""
450 return int(self.index[rev][0] >> 16)
451 return int(self.index[rev][0] >> 16)
451
452
452 def length(self, rev):
453 def length(self, rev):
453 """the length of the data chunk for this revision"""
454 """the length of the data chunk for this revision"""
454 return self.index[rev][1]
455 return self.index[rev][1]
455
456
456 def end(self, rev):
457 def end(self, rev):
457 """the end of the data chunk for this revision"""
458 """the end of the data chunk for this revision"""
458 return self.start(rev) + self.length(rev)
459 return self.start(rev) + self.length(rev)
459
460
460 def deltaparent(self, rev):
461 def deltaparent(self, rev):
461 """return deltaparent of the given revision"""
462 """return deltaparent of the given revision"""
462 base = self.index[rev][3]
463 base = self.index[rev][3]
463 if base == rev:
464 if base == rev:
464 return nullrev
465 return nullrev
465 elif self.delta_config.general_delta:
466 elif self.delta_config.general_delta:
466 return base
467 return base
467 else:
468 else:
468 return rev - 1
469 return rev - 1
469
470
470 def issnapshot(self, rev):
471 def issnapshot(self, rev):
471 """tells whether rev is a snapshot"""
472 """tells whether rev is a snapshot"""
472 if not self.delta_config.sparse_revlog:
473 if not self.delta_config.sparse_revlog:
473 return self.deltaparent(rev) == nullrev
474 return self.deltaparent(rev) == nullrev
474 elif hasattr(self.index, 'issnapshot'):
475 elif hasattr(self.index, 'issnapshot'):
475 # directly assign the method to cache the testing and access
476 # directly assign the method to cache the testing and access
476 self.issnapshot = self.index.issnapshot
477 self.issnapshot = self.index.issnapshot
477 return self.issnapshot(rev)
478 return self.issnapshot(rev)
478 if rev == nullrev:
479 if rev == nullrev:
479 return True
480 return True
480 entry = self.index[rev]
481 entry = self.index[rev]
481 base = entry[3]
482 base = entry[3]
482 if base == rev:
483 if base == rev:
483 return True
484 return True
484 if base == nullrev:
485 if base == nullrev:
485 return True
486 return True
486 p1 = entry[5]
487 p1 = entry[5]
487 while self.length(p1) == 0:
488 while self.length(p1) == 0:
488 b = self.deltaparent(p1)
489 b = self.deltaparent(p1)
489 if b == p1:
490 if b == p1:
490 break
491 break
491 p1 = b
492 p1 = b
492 p2 = entry[6]
493 p2 = entry[6]
493 while self.length(p2) == 0:
494 while self.length(p2) == 0:
494 b = self.deltaparent(p2)
495 b = self.deltaparent(p2)
495 if b == p2:
496 if b == p2:
496 break
497 break
497 p2 = b
498 p2 = b
498 if base == p1 or base == p2:
499 if base == p1 or base == p2:
499 return False
500 return False
500 return self.issnapshot(base)
501 return self.issnapshot(base)
501
502
502 def _deltachain(self, rev, stoprev=None):
503 def _deltachain(self, rev, stoprev=None):
503 """Obtain the delta chain for a revision.
504 """Obtain the delta chain for a revision.
504
505
505 ``stoprev`` specifies a revision to stop at. If not specified, we
506 ``stoprev`` specifies a revision to stop at. If not specified, we
506 stop at the base of the chain.
507 stop at the base of the chain.
507
508
508 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
509 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
509 revs in ascending order and ``stopped`` is a bool indicating whether
510 revs in ascending order and ``stopped`` is a bool indicating whether
510 ``stoprev`` was hit.
511 ``stoprev`` was hit.
511 """
512 """
512 generaldelta = self.delta_config.general_delta
513 generaldelta = self.delta_config.general_delta
513 # Try C implementation.
514 # Try C implementation.
514 try:
515 try:
515 return self.index.deltachain(rev, stoprev, generaldelta)
516 return self.index.deltachain(rev, stoprev, generaldelta)
516 except AttributeError:
517 except AttributeError:
517 pass
518 pass
518
519
519 chain = []
520 chain = []
520
521
521 # Alias to prevent attribute lookup in tight loop.
522 # Alias to prevent attribute lookup in tight loop.
522 index = self.index
523 index = self.index
523
524
524 iterrev = rev
525 iterrev = rev
525 e = index[iterrev]
526 e = index[iterrev]
526 while iterrev != e[3] and iterrev != stoprev:
527 while iterrev != e[3] and iterrev != stoprev:
527 chain.append(iterrev)
528 chain.append(iterrev)
528 if generaldelta:
529 if generaldelta:
529 iterrev = e[3]
530 iterrev = e[3]
530 else:
531 else:
531 iterrev -= 1
532 iterrev -= 1
532 e = index[iterrev]
533 e = index[iterrev]
533
534
534 if iterrev == stoprev:
535 if iterrev == stoprev:
535 stopped = True
536 stopped = True
536 else:
537 else:
537 chain.append(iterrev)
538 chain.append(iterrev)
538 stopped = False
539 stopped = False
539
540
540 chain.reverse()
541 chain.reverse()
541 return chain, stopped
542 return chain, stopped
542
543
543 @util.propertycache
544 @util.propertycache
544 def _compressor(self):
545 def _compressor(self):
545 engine = util.compengines[self.feature_config.compression_engine]
546 engine = util.compengines[self.feature_config.compression_engine]
546 return engine.revlogcompressor(
547 return engine.revlogcompressor(
547 self.feature_config.compression_engine_options
548 self.feature_config.compression_engine_options
548 )
549 )
549
550
550 @util.propertycache
551 @util.propertycache
551 def _decompressor(self):
552 def _decompressor(self):
552 """the default decompressor"""
553 """the default decompressor"""
553 if self._default_compression_header is None:
554 if self._default_compression_header is None:
554 return None
555 return None
555 t = self._default_compression_header
556 t = self._default_compression_header
556 c = self._get_decompressor(t)
557 c = self._get_decompressor(t)
557 return c.decompress
558 return c.decompress
558
559
559 def _get_decompressor(self, t):
560 def _get_decompressor(self, t):
560 try:
561 try:
561 compressor = self._decompressors[t]
562 compressor = self._decompressors[t]
562 except KeyError:
563 except KeyError:
563 try:
564 try:
564 engine = util.compengines.forrevlogheader(t)
565 engine = util.compengines.forrevlogheader(t)
565 compressor = engine.revlogcompressor(
566 compressor = engine.revlogcompressor(
566 self.feature_config.compression_engine_options
567 self.feature_config.compression_engine_options
567 )
568 )
568 self._decompressors[t] = compressor
569 self._decompressors[t] = compressor
569 except KeyError:
570 except KeyError:
570 raise error.RevlogError(
571 raise error.RevlogError(
571 _(b'unknown compression type %s') % binascii.hexlify(t)
572 _(b'unknown compression type %s') % binascii.hexlify(t)
572 )
573 )
573 return compressor
574 return compressor
574
575
575 def compress(self, data):
576 def compress(self, data):
576 """Generate a possibly-compressed representation of data."""
577 """Generate a possibly-compressed representation of data."""
577 if not data:
578 if not data:
578 return b'', data
579 return b'', data
579
580
580 compressed = self._compressor.compress(data)
581 compressed = self._compressor.compress(data)
581
582
582 if compressed:
583 if compressed:
583 # The revlog compressor added the header in the returned data.
584 # The revlog compressor added the header in the returned data.
584 return b'', compressed
585 return b'', compressed
585
586
586 if data[0:1] == b'\0':
587 if data[0:1] == b'\0':
587 return b'', data
588 return b'', data
588 return b'u', data
589 return b'u', data
589
590
590 def decompress(self, data):
591 def decompress(self, data):
591 """Decompress a revlog chunk.
592 """Decompress a revlog chunk.
592
593
593 The chunk is expected to begin with a header identifying the
594 The chunk is expected to begin with a header identifying the
594 format type so it can be routed to an appropriate decompressor.
595 format type so it can be routed to an appropriate decompressor.
595 """
596 """
596 if not data:
597 if not data:
597 return data
598 return data
598
599
599 # Revlogs are read much more frequently than they are written and many
600 # Revlogs are read much more frequently than they are written and many
600 # chunks only take microseconds to decompress, so performance is
601 # chunks only take microseconds to decompress, so performance is
601 # important here.
602 # important here.
602 #
603 #
603 # We can make a few assumptions about revlogs:
604 # We can make a few assumptions about revlogs:
604 #
605 #
605 # 1) the majority of chunks will be compressed (as opposed to inline
606 # 1) the majority of chunks will be compressed (as opposed to inline
606 # raw data).
607 # raw data).
607 # 2) decompressing *any* data will likely by at least 10x slower than
608 # 2) decompressing *any* data will likely by at least 10x slower than
608 # returning raw inline data.
609 # returning raw inline data.
609 # 3) we want to prioritize common and officially supported compression
610 # 3) we want to prioritize common and officially supported compression
610 # engines
611 # engines
611 #
612 #
612 # It follows that we want to optimize for "decompress compressed data
613 # It follows that we want to optimize for "decompress compressed data
613 # when encoded with common and officially supported compression engines"
614 # when encoded with common and officially supported compression engines"
614 # case over "raw data" and "data encoded by less common or non-official
615 # case over "raw data" and "data encoded by less common or non-official
615 # compression engines." That is why we have the inline lookup first
616 # compression engines." That is why we have the inline lookup first
616 # followed by the compengines lookup.
617 # followed by the compengines lookup.
617 #
618 #
618 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
619 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
619 # compressed chunks. And this matters for changelog and manifest reads.
620 # compressed chunks. And this matters for changelog and manifest reads.
620 t = data[0:1]
621 t = data[0:1]
621
622
622 if t == b'x':
623 if t == b'x':
623 try:
624 try:
624 return _zlibdecompress(data)
625 return _zlibdecompress(data)
625 except zlib.error as e:
626 except zlib.error as e:
626 raise error.RevlogError(
627 raise error.RevlogError(
627 _(b'revlog decompress error: %s')
628 _(b'revlog decompress error: %s')
628 % stringutil.forcebytestr(e)
629 % stringutil.forcebytestr(e)
629 )
630 )
630 # '\0' is more common than 'u' so it goes first.
631 # '\0' is more common than 'u' so it goes first.
631 elif t == b'\0':
632 elif t == b'\0':
632 return data
633 return data
633 elif t == b'u':
634 elif t == b'u':
634 return util.buffer(data, 1)
635 return util.buffer(data, 1)
635
636
636 compressor = self._get_decompressor(t)
637 compressor = self._get_decompressor(t)
637
638
638 return compressor.decompress(data)
639 return compressor.decompress(data)
639
640
640 @contextlib.contextmanager
641 @contextlib.contextmanager
641 def reading(self):
642 def reading(self):
642 """Context manager that keeps data and sidedata files open for reading"""
643 """Context manager that keeps data and sidedata files open for reading"""
643 if len(self.index) == 0:
644 if len(self.index) == 0:
644 yield # nothing to be read
645 yield # nothing to be read
645 elif self._delay_buffer is not None and self.inline:
646 elif self._delay_buffer is not None and self.inline:
646 msg = "revlog with delayed write should not be inline"
647 msg = "revlog with delayed write should not be inline"
647 raise error.ProgrammingError(msg)
648 raise error.ProgrammingError(msg)
648 else:
649 else:
649 with self._segmentfile.reading():
650 with self._segmentfile.reading():
650 with self._segmentfile_sidedata.reading():
651 with self._segmentfile_sidedata.reading():
651 yield
652 yield
652
653
653 @property
654 @property
654 def is_writing(self):
655 def is_writing(self):
655 """True is a writing context is open"""
656 """True is a writing context is open"""
656 return self._writinghandles is not None
657 return self._writinghandles is not None
657
658
658 @property
659 @property
659 def is_open(self):
660 def is_open(self):
660 """True if any file handle is being held
661 """True if any file handle is being held
661
662
662 Used for assert and debug in the python code"""
663 Used for assert and debug in the python code"""
663 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
664 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
664
665
665 @contextlib.contextmanager
666 @contextlib.contextmanager
666 def writing(self, transaction, data_end=None, sidedata_end=None):
667 def writing(self, transaction, data_end=None, sidedata_end=None):
667 """Open the revlog files for writing
668 """Open the revlog files for writing
668
669
669 Add content to a revlog should be done within such context.
670 Add content to a revlog should be done within such context.
670 """
671 """
671 if self.is_writing:
672 if self.is_writing:
672 yield
673 yield
673 else:
674 else:
674 ifh = dfh = sdfh = None
675 ifh = dfh = sdfh = None
675 try:
676 try:
676 r = len(self.index)
677 r = len(self.index)
677 # opening the data file.
678 # opening the data file.
678 dsize = 0
679 dsize = 0
679 if r:
680 if r:
680 dsize = self.end(r - 1)
681 dsize = self.end(r - 1)
681 dfh = None
682 dfh = None
682 if not self.inline:
683 if not self.inline:
683 try:
684 try:
684 dfh = self.opener(self.data_file, mode=b"r+")
685 dfh = self.opener(self.data_file, mode=b"r+")
685 if data_end is None:
686 if data_end is None:
686 dfh.seek(0, os.SEEK_END)
687 dfh.seek(0, os.SEEK_END)
687 else:
688 else:
688 dfh.seek(data_end, os.SEEK_SET)
689 dfh.seek(data_end, os.SEEK_SET)
689 except FileNotFoundError:
690 except FileNotFoundError:
690 dfh = self.opener(self.data_file, mode=b"w+")
691 dfh = self.opener(self.data_file, mode=b"w+")
691 transaction.add(self.data_file, dsize)
692 transaction.add(self.data_file, dsize)
692 if self.sidedata_file is not None:
693 if self.sidedata_file is not None:
693 assert sidedata_end is not None
694 assert sidedata_end is not None
694 # revlog-v2 does not inline, help Pytype
695 # revlog-v2 does not inline, help Pytype
695 assert dfh is not None
696 assert dfh is not None
696 try:
697 try:
697 sdfh = self.opener(self.sidedata_file, mode=b"r+")
698 sdfh = self.opener(self.sidedata_file, mode=b"r+")
698 dfh.seek(sidedata_end, os.SEEK_SET)
699 dfh.seek(sidedata_end, os.SEEK_SET)
699 except FileNotFoundError:
700 except FileNotFoundError:
700 sdfh = self.opener(self.sidedata_file, mode=b"w+")
701 sdfh = self.opener(self.sidedata_file, mode=b"w+")
701 transaction.add(self.sidedata_file, sidedata_end)
702 transaction.add(self.sidedata_file, sidedata_end)
702
703
703 # opening the index file.
704 # opening the index file.
704 isize = r * self.index.entry_size
705 isize = r * self.index.entry_size
705 ifh = self.__index_write_fp()
706 ifh = self.__index_write_fp()
706 if self.inline:
707 if self.inline:
707 transaction.add(self.index_file, dsize + isize)
708 transaction.add(self.index_file, dsize + isize)
708 else:
709 else:
709 transaction.add(self.index_file, isize)
710 transaction.add(self.index_file, isize)
710 # exposing all file handle for writing.
711 # exposing all file handle for writing.
711 self._writinghandles = (ifh, dfh, sdfh)
712 self._writinghandles = (ifh, dfh, sdfh)
712 self._segmentfile.writing_handle = ifh if self.inline else dfh
713 self._segmentfile.writing_handle = ifh if self.inline else dfh
713 self._segmentfile_sidedata.writing_handle = sdfh
714 self._segmentfile_sidedata.writing_handle = sdfh
714 yield
715 yield
715 finally:
716 finally:
716 self._writinghandles = None
717 self._writinghandles = None
717 self._segmentfile.writing_handle = None
718 self._segmentfile.writing_handle = None
718 self._segmentfile_sidedata.writing_handle = None
719 self._segmentfile_sidedata.writing_handle = None
719 if dfh is not None:
720 if dfh is not None:
720 dfh.close()
721 dfh.close()
721 if sdfh is not None:
722 if sdfh is not None:
722 sdfh.close()
723 sdfh.close()
723 # closing the index file last to avoid exposing referent to
724 # closing the index file last to avoid exposing referent to
724 # potential unflushed data content.
725 # potential unflushed data content.
725 if ifh is not None:
726 if ifh is not None:
726 ifh.close()
727 ifh.close()
727
728
728 def __index_write_fp(self, index_end=None):
729 def __index_write_fp(self, index_end=None):
729 """internal method to open the index file for writing
730 """internal method to open the index file for writing
730
731
731 You should not use this directly and use `_writing` instead
732 You should not use this directly and use `_writing` instead
732 """
733 """
733 try:
734 try:
734 if self._delay_buffer is None:
735 if self._delay_buffer is None:
735 f = self.opener(
736 f = self.opener(
736 self.index_file,
737 self.index_file,
737 mode=b"r+",
738 mode=b"r+",
738 checkambig=self.data_config.check_ambig,
739 checkambig=self.data_config.check_ambig,
739 )
740 )
740 else:
741 else:
741 # check_ambig affect we way we open file for writing, however
742 # check_ambig affect we way we open file for writing, however
742 # here, we do not actually open a file for writting as write
743 # here, we do not actually open a file for writting as write
743 # will appened to a delay_buffer. So check_ambig is not
744 # will appened to a delay_buffer. So check_ambig is not
744 # meaningful and unneeded here.
745 # meaningful and unneeded here.
745 f = randomaccessfile.appender(
746 f = randomaccessfile.appender(
746 self.opener, self.index_file, b"r+", self._delay_buffer
747 self.opener, self.index_file, b"r+", self._delay_buffer
747 )
748 )
748 if index_end is None:
749 if index_end is None:
749 f.seek(0, os.SEEK_END)
750 f.seek(0, os.SEEK_END)
750 else:
751 else:
751 f.seek(index_end, os.SEEK_SET)
752 f.seek(index_end, os.SEEK_SET)
752 return f
753 return f
753 except FileNotFoundError:
754 except FileNotFoundError:
754 if self._delay_buffer is None:
755 if self._delay_buffer is None:
755 return self.opener(
756 return self.opener(
756 self.index_file,
757 self.index_file,
757 mode=b"w+",
758 mode=b"w+",
758 checkambig=self.data_config.check_ambig,
759 checkambig=self.data_config.check_ambig,
759 )
760 )
760 else:
761 else:
761 return randomaccessfile.appender(
762 return randomaccessfile.appender(
762 self.opener, self.index_file, b"w+", self._delay_buffer
763 self.opener, self.index_file, b"w+", self._delay_buffer
763 )
764 )
764
765
765 def __index_new_fp(self):
766 def __index_new_fp(self):
766 """internal method to create a new index file for writing
767 """internal method to create a new index file for writing
767
768
768 You should not use this unless you are upgrading from inline revlog
769 You should not use this unless you are upgrading from inline revlog
769 """
770 """
770 return self.opener(
771 return self.opener(
771 self.index_file,
772 self.index_file,
772 mode=b"w",
773 mode=b"w",
773 checkambig=self.data_config.check_ambig,
774 checkambig=self.data_config.check_ambig,
774 atomictemp=True,
775 atomictemp=True,
775 )
776 )
776
777
777 def split_inline(self, tr, header, new_index_file_path=None):
778 def split_inline(self, tr, header, new_index_file_path=None):
778 """split the data of an inline revlog into an index and a data file"""
779 """split the data of an inline revlog into an index and a data file"""
779 assert self._delay_buffer is None
780 assert self._delay_buffer is None
780 existing_handles = False
781 existing_handles = False
781 if self._writinghandles is not None:
782 if self._writinghandles is not None:
782 existing_handles = True
783 existing_handles = True
783 fp = self._writinghandles[0]
784 fp = self._writinghandles[0]
784 fp.flush()
785 fp.flush()
785 fp.close()
786 fp.close()
786 # We can't use the cached file handle after close(). So prevent
787 # We can't use the cached file handle after close(). So prevent
787 # its usage.
788 # its usage.
788 self._writinghandles = None
789 self._writinghandles = None
789 self._segmentfile.writing_handle = None
790 self._segmentfile.writing_handle = None
790 # No need to deal with sidedata writing handle as it is only
791 # No need to deal with sidedata writing handle as it is only
791 # relevant with revlog-v2 which is never inline, not reaching
792 # relevant with revlog-v2 which is never inline, not reaching
792 # this code
793 # this code
793
794
794 new_dfh = self.opener(self.data_file, mode=b"w+")
795 new_dfh = self.opener(self.data_file, mode=b"w+")
795 new_dfh.truncate(0) # drop any potentially existing data
796 new_dfh.truncate(0) # drop any potentially existing data
796 try:
797 try:
797 with self.reading():
798 with self.reading():
798 for r in range(len(self.index)):
799 for r in range(len(self.index)):
799 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 new_dfh.write(self.get_segment_for_revs(r, r)[1])
800 new_dfh.flush()
801 new_dfh.flush()
801
802
802 if new_index_file_path is not None:
803 if new_index_file_path is not None:
803 self.index_file = new_index_file_path
804 self.index_file = new_index_file_path
804 with self.__index_new_fp() as fp:
805 with self.__index_new_fp() as fp:
805 self.inline = False
806 self.inline = False
806 for i in range(len(self.index)):
807 for i in range(len(self.index)):
807 e = self.index.entry_binary(i)
808 e = self.index.entry_binary(i)
808 if i == 0:
809 if i == 0:
809 packed_header = self.index.pack_header(header)
810 packed_header = self.index.pack_header(header)
810 e = packed_header + e
811 e = packed_header + e
811 fp.write(e)
812 fp.write(e)
812
813
813 # If we don't use side-write, the temp file replace the real
814 # If we don't use side-write, the temp file replace the real
814 # index when we exit the context manager
815 # index when we exit the context manager
815
816
816 self._segmentfile = randomaccessfile.randomaccessfile(
817 self._segmentfile = randomaccessfile.randomaccessfile(
817 self.opener,
818 self.opener,
818 self.data_file,
819 self.data_file,
819 self.data_config.chunk_cache_size,
820 self.data_config.chunk_cache_size,
820 )
821 )
821
822
822 if existing_handles:
823 if existing_handles:
823 # switched from inline to conventional reopen the index
824 # switched from inline to conventional reopen the index
824 ifh = self.__index_write_fp()
825 ifh = self.__index_write_fp()
825 self._writinghandles = (ifh, new_dfh, None)
826 self._writinghandles = (ifh, new_dfh, None)
826 self._segmentfile.writing_handle = new_dfh
827 self._segmentfile.writing_handle = new_dfh
827 new_dfh = None
828 new_dfh = None
828 # No need to deal with sidedata writing handle as it is only
829 # No need to deal with sidedata writing handle as it is only
829 # relevant with revlog-v2 which is never inline, not reaching
830 # relevant with revlog-v2 which is never inline, not reaching
830 # this code
831 # this code
831 finally:
832 finally:
832 if new_dfh is not None:
833 if new_dfh is not None:
833 new_dfh.close()
834 new_dfh.close()
834 return self.index_file
835 return self.index_file
835
836
836 def get_segment_for_revs(self, startrev, endrev):
837 def get_segment_for_revs(self, startrev, endrev):
837 """Obtain a segment of raw data corresponding to a range of revisions.
838 """Obtain a segment of raw data corresponding to a range of revisions.
838
839
839 Accepts the start and end revisions and an optional already-open
840 Accepts the start and end revisions and an optional already-open
840 file handle to be used for reading. If the file handle is read, its
841 file handle to be used for reading. If the file handle is read, its
841 seek position will not be preserved.
842 seek position will not be preserved.
842
843
843 Requests for data may be satisfied by a cache.
844 Requests for data may be satisfied by a cache.
844
845
845 Returns a 2-tuple of (offset, data) for the requested range of
846 Returns a 2-tuple of (offset, data) for the requested range of
846 revisions. Offset is the integer offset from the beginning of the
847 revisions. Offset is the integer offset from the beginning of the
847 revlog and data is a str or buffer of the raw byte data.
848 revlog and data is a str or buffer of the raw byte data.
848
849
849 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
850 to determine where each revision's data begins and ends.
851 to determine where each revision's data begins and ends.
851
852
852 API: we should consider making this a private part of the InnerRevlog
853 API: we should consider making this a private part of the InnerRevlog
853 at some point.
854 at some point.
854 """
855 """
855 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
856 # (functions are expensive).
857 # (functions are expensive).
857 index = self.index
858 index = self.index
858 istart = index[startrev]
859 istart = index[startrev]
859 start = int(istart[0] >> 16)
860 start = int(istart[0] >> 16)
860 if startrev == endrev:
861 if startrev == endrev:
861 end = start + istart[1]
862 end = start + istart[1]
862 else:
863 else:
863 iend = index[endrev]
864 iend = index[endrev]
864 end = int(iend[0] >> 16) + iend[1]
865 end = int(iend[0] >> 16) + iend[1]
865
866
866 if self.inline:
867 if self.inline:
867 start += (startrev + 1) * self.index.entry_size
868 start += (startrev + 1) * self.index.entry_size
868 end += (endrev + 1) * self.index.entry_size
869 end += (endrev + 1) * self.index.entry_size
869 length = end - start
870 length = end - start
870
871
871 return start, self._segmentfile.read_chunk(start, length)
872 return start, self._segmentfile.read_chunk(start, length)
872
873
873 def _chunk(self, rev):
874 def _chunk(self, rev):
874 """Obtain a single decompressed chunk for a revision.
875 """Obtain a single decompressed chunk for a revision.
875
876
876 Accepts an integer revision and an optional already-open file handle
877 Accepts an integer revision and an optional already-open file handle
877 to be used for reading. If used, the seek position of the file will not
878 to be used for reading. If used, the seek position of the file will not
878 be preserved.
879 be preserved.
879
880
880 Returns a str holding uncompressed data for the requested revision.
881 Returns a str holding uncompressed data for the requested revision.
881 """
882 """
882 if self._uncompressed_chunk_cache is not None:
883 if self._uncompressed_chunk_cache is not None:
883 uncomp = self._uncompressed_chunk_cache.get(rev)
884 uncomp = self._uncompressed_chunk_cache.get(rev)
884 if uncomp is not None:
885 if uncomp is not None:
885 return uncomp
886 return uncomp
886
887
887 compression_mode = self.index[rev][10]
888 compression_mode = self.index[rev][10]
888 data = self.get_segment_for_revs(rev, rev)[1]
889 data = self.get_segment_for_revs(rev, rev)[1]
889 if compression_mode == COMP_MODE_PLAIN:
890 if compression_mode == COMP_MODE_PLAIN:
890 uncomp = data
891 uncomp = data
891 elif compression_mode == COMP_MODE_DEFAULT:
892 elif compression_mode == COMP_MODE_DEFAULT:
892 uncomp = self._decompressor(data)
893 uncomp = self._decompressor(data)
893 elif compression_mode == COMP_MODE_INLINE:
894 elif compression_mode == COMP_MODE_INLINE:
894 uncomp = self.decompress(data)
895 uncomp = self.decompress(data)
895 else:
896 else:
896 msg = b'unknown compression mode %d'
897 msg = b'unknown compression mode %d'
897 msg %= compression_mode
898 msg %= compression_mode
898 raise error.RevlogError(msg)
899 raise error.RevlogError(msg)
899 if self._uncompressed_chunk_cache is not None:
900 if self._uncompressed_chunk_cache is not None:
900 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
901 return uncomp
902 return uncomp
902
903
903 def _chunks(self, revs, targetsize=None):
904 def _chunks(self, revs, targetsize=None):
904 """Obtain decompressed chunks for the specified revisions.
905 """Obtain decompressed chunks for the specified revisions.
905
906
906 Accepts an iterable of numeric revisions that are assumed to be in
907 Accepts an iterable of numeric revisions that are assumed to be in
907 ascending order. Also accepts an optional already-open file handle
908 ascending order. Also accepts an optional already-open file handle
908 to be used for reading. If used, the seek position of the file will
909 to be used for reading. If used, the seek position of the file will
909 not be preserved.
910 not be preserved.
910
911
911 This function is similar to calling ``self._chunk()`` multiple times,
912 This function is similar to calling ``self._chunk()`` multiple times,
912 but is faster.
913 but is faster.
913
914
914 Returns a list with decompressed data for each requested revision.
915 Returns a list with decompressed data for each requested revision.
915 """
916 """
916 if not revs:
917 if not revs:
917 return []
918 return []
918 start = self.start
919 start = self.start
919 length = self.length
920 length = self.length
920 inline = self.inline
921 inline = self.inline
921 iosize = self.index.entry_size
922 iosize = self.index.entry_size
922 buffer = util.buffer
923 buffer = util.buffer
923
924
924 fetched_revs = []
925 fetched_revs = []
925 fadd = fetched_revs.append
926 fadd = fetched_revs.append
926
927
927 chunks = []
928 chunks = []
928 ladd = chunks.append
929 ladd = chunks.append
929
930
930 if self._uncompressed_chunk_cache is None:
931 if self._uncompressed_chunk_cache is None:
931 fetched_revs = revs
932 fetched_revs = revs
932 else:
933 else:
933 for rev in revs:
934 for rev in revs:
934 cached_value = self._uncompressed_chunk_cache.get(rev)
935 cached_value = self._uncompressed_chunk_cache.get(rev)
935 if cached_value is None:
936 if cached_value is None:
936 fadd(rev)
937 fadd(rev)
937 else:
938 else:
938 ladd((rev, cached_value))
939 ladd((rev, cached_value))
939
940
940 if not fetched_revs:
941 if not fetched_revs:
941 slicedchunks = ()
942 slicedchunks = ()
942 elif not self.data_config.with_sparse_read:
943 elif not self.data_config.with_sparse_read:
943 slicedchunks = (fetched_revs,)
944 slicedchunks = (fetched_revs,)
944 else:
945 else:
945 slicedchunks = deltautil.slicechunk(
946 slicedchunks = deltautil.slicechunk(
946 self,
947 self,
947 fetched_revs,
948 fetched_revs,
948 targetsize=targetsize,
949 targetsize=targetsize,
949 )
950 )
950
951
951 for revschunk in slicedchunks:
952 for revschunk in slicedchunks:
952 firstrev = revschunk[0]
953 firstrev = revschunk[0]
953 # Skip trailing revisions with empty diff
954 # Skip trailing revisions with empty diff
954 for lastrev in revschunk[::-1]:
955 for lastrev in revschunk[::-1]:
955 if length(lastrev) != 0:
956 if length(lastrev) != 0:
956 break
957 break
957
958
958 try:
959 try:
959 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 offset, data = self.get_segment_for_revs(firstrev, lastrev)
960 except OverflowError:
961 except OverflowError:
961 # issue4215 - we can't cache a run of chunks greater than
962 # issue4215 - we can't cache a run of chunks greater than
962 # 2G on Windows
963 # 2G on Windows
963 for rev in revschunk:
964 for rev in revschunk:
964 ladd((rev, self._chunk(rev)))
965 ladd((rev, self._chunk(rev)))
965
966
966 decomp = self.decompress
967 decomp = self.decompress
967 # self._decompressor might be None, but will not be used in that case
968 # self._decompressor might be None, but will not be used in that case
968 def_decomp = self._decompressor
969 def_decomp = self._decompressor
969 for rev in revschunk:
970 for rev in revschunk:
970 chunkstart = start(rev)
971 chunkstart = start(rev)
971 if inline:
972 if inline:
972 chunkstart += (rev + 1) * iosize
973 chunkstart += (rev + 1) * iosize
973 chunklength = length(rev)
974 chunklength = length(rev)
974 comp_mode = self.index[rev][10]
975 comp_mode = self.index[rev][10]
975 c = buffer(data, chunkstart - offset, chunklength)
976 c = buffer(data, chunkstart - offset, chunklength)
976 if comp_mode == COMP_MODE_PLAIN:
977 if comp_mode == COMP_MODE_PLAIN:
977 c = c
978 c = c
978 elif comp_mode == COMP_MODE_INLINE:
979 elif comp_mode == COMP_MODE_INLINE:
979 c = decomp(c)
980 c = decomp(c)
980 elif comp_mode == COMP_MODE_DEFAULT:
981 elif comp_mode == COMP_MODE_DEFAULT:
981 c = def_decomp(c)
982 c = def_decomp(c)
982 else:
983 else:
983 msg = b'unknown compression mode %d'
984 msg = b'unknown compression mode %d'
984 msg %= comp_mode
985 msg %= comp_mode
985 raise error.RevlogError(msg)
986 raise error.RevlogError(msg)
986 ladd((rev, c))
987 ladd((rev, c))
987 if self._uncompressed_chunk_cache is not None:
988 if self._uncompressed_chunk_cache is not None:
988 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989 self._uncompressed_chunk_cache.insert(rev, c, len(c))
989
990
990 chunks.sort()
991 chunks.sort()
991 return [x[1] for x in chunks]
992 return [x[1] for x in chunks]
992
993
993 def raw_text(self, node, rev):
994 def raw_text(self, node, rev):
994 """return the possibly unvalidated rawtext for a revision
995 """return the possibly unvalidated rawtext for a revision
995
996
996 returns (rev, rawtext, validated)
997 returns (rev, rawtext, validated)
997 """
998 """
998
999
999 # revision in the cache (could be useful to apply delta)
1000 # revision in the cache (could be useful to apply delta)
1000 cachedrev = None
1001 cachedrev = None
1001 # An intermediate text to apply deltas to
1002 # An intermediate text to apply deltas to
1002 basetext = None
1003 basetext = None
1003
1004
1004 # Check if we have the entry in cache
1005 # Check if we have the entry in cache
1005 # The cache entry looks like (node, rev, rawtext)
1006 # The cache entry looks like (node, rev, rawtext)
1006 if self._revisioncache:
1007 if self._revisioncache:
1007 cachedrev = self._revisioncache[1]
1008 cachedrev = self._revisioncache[1]
1008
1009
1009 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1010 if stopped:
1011 if stopped:
1011 basetext = self._revisioncache[2]
1012 basetext = self._revisioncache[2]
1012
1013
1013 # drop cache to save memory, the caller is expected to
1014 # drop cache to save memory, the caller is expected to
1014 # update self._inner._revisioncache after validating the text
1015 # update self._inner._revisioncache after validating the text
1015 self._revisioncache = None
1016 self._revisioncache = None
1016
1017
1017 targetsize = None
1018 targetsize = None
1018 rawsize = self.index[rev][2]
1019 rawsize = self.index[rev][2]
1019 if 0 <= rawsize:
1020 if 0 <= rawsize:
1020 targetsize = 4 * rawsize
1021 targetsize = 4 * rawsize
1021
1022
1022 if self._uncompressed_chunk_cache is not None:
1023 if self._uncompressed_chunk_cache is not None:
1023 # dynamically update the uncompressed_chunk_cache size to the
1024 # dynamically update the uncompressed_chunk_cache size to the
1024 # largest revision we saw in this revlog.
1025 # largest revision we saw in this revlog.
1025 factor = self.data_config.uncompressed_cache_factor
1026 factor = self.data_config.uncompressed_cache_factor
1026 candidate_size = rawsize * factor
1027 candidate_size = rawsize * factor
1027 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1028 self._uncompressed_chunk_cache.maxcost = candidate_size
1029 self._uncompressed_chunk_cache.maxcost = candidate_size
1029
1030
1030 bins = self._chunks(chain, targetsize=targetsize)
1031 bins = self._chunks(chain, targetsize=targetsize)
1031 if basetext is None:
1032 if basetext is None:
1032 basetext = bytes(bins[0])
1033 basetext = bytes(bins[0])
1033 bins = bins[1:]
1034 bins = bins[1:]
1034
1035
1035 rawtext = mdiff.patches(basetext, bins)
1036 rawtext = mdiff.patches(basetext, bins)
1036 del basetext # let us have a chance to free memory early
1037 del basetext # let us have a chance to free memory early
1037 return (rev, rawtext, False)
1038 return (rev, rawtext, False)
1038
1039
1039 def sidedata(self, rev, sidedata_end):
1040 def sidedata(self, rev, sidedata_end):
1040 """Return the sidedata for a given revision number."""
1041 """Return the sidedata for a given revision number."""
1041 index_entry = self.index[rev]
1042 index_entry = self.index[rev]
1042 sidedata_offset = index_entry[8]
1043 sidedata_offset = index_entry[8]
1043 sidedata_size = index_entry[9]
1044 sidedata_size = index_entry[9]
1044
1045
1045 if self.inline:
1046 if self.inline:
1046 sidedata_offset += self.index.entry_size * (1 + rev)
1047 sidedata_offset += self.index.entry_size * (1 + rev)
1047 if sidedata_size == 0:
1048 if sidedata_size == 0:
1048 return {}
1049 return {}
1049
1050
1050 if sidedata_end < sidedata_offset + sidedata_size:
1051 if sidedata_end < sidedata_offset + sidedata_size:
1051 filename = self.sidedata_file
1052 filename = self.sidedata_file
1052 end = sidedata_end
1053 end = sidedata_end
1053 offset = sidedata_offset
1054 offset = sidedata_offset
1054 length = sidedata_size
1055 length = sidedata_size
1055 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1056 raise error.RevlogError(m)
1057 raise error.RevlogError(m)
1057
1058
1058 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 comp_segment = self._segmentfile_sidedata.read_chunk(
1059 sidedata_offset, sidedata_size
1060 sidedata_offset, sidedata_size
1060 )
1061 )
1061
1062
1062 comp = self.index[rev][11]
1063 comp = self.index[rev][11]
1063 if comp == COMP_MODE_PLAIN:
1064 if comp == COMP_MODE_PLAIN:
1064 segment = comp_segment
1065 segment = comp_segment
1065 elif comp == COMP_MODE_DEFAULT:
1066 elif comp == COMP_MODE_DEFAULT:
1066 segment = self._decompressor(comp_segment)
1067 segment = self._decompressor(comp_segment)
1067 elif comp == COMP_MODE_INLINE:
1068 elif comp == COMP_MODE_INLINE:
1068 segment = self.decompress(comp_segment)
1069 segment = self.decompress(comp_segment)
1069 else:
1070 else:
1070 msg = b'unknown compression mode %d'
1071 msg = b'unknown compression mode %d'
1071 msg %= comp
1072 msg %= comp
1072 raise error.RevlogError(msg)
1073 raise error.RevlogError(msg)
1073
1074
1074 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 sidedata = sidedatautil.deserialize_sidedata(segment)
1075 return sidedata
1076 return sidedata
1076
1077
1077 def write_entry(
1078 def write_entry(
1078 self,
1079 self,
1079 transaction,
1080 transaction,
1080 entry,
1081 entry,
1081 data,
1082 data,
1082 link,
1083 link,
1083 offset,
1084 offset,
1084 sidedata,
1085 sidedata,
1085 sidedata_offset,
1086 sidedata_offset,
1086 index_end,
1087 index_end,
1087 data_end,
1088 data_end,
1088 sidedata_end,
1089 sidedata_end,
1089 ):
1090 ):
1090 # Files opened in a+ mode have inconsistent behavior on various
1091 # Files opened in a+ mode have inconsistent behavior on various
1091 # platforms. Windows requires that a file positioning call be made
1092 # platforms. Windows requires that a file positioning call be made
1092 # when the file handle transitions between reads and writes. See
1093 # when the file handle transitions between reads and writes. See
1093 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1094 # platforms, Python or the platform itself can be buggy. Some versions
1095 # platforms, Python or the platform itself can be buggy. Some versions
1095 # of Solaris have been observed to not append at the end of the file
1096 # of Solaris have been observed to not append at the end of the file
1096 # if the file was seeked to before the end. See issue4943 for more.
1097 # if the file was seeked to before the end. See issue4943 for more.
1097 #
1098 #
1098 # We work around this issue by inserting a seek() before writing.
1099 # We work around this issue by inserting a seek() before writing.
1099 # Note: This is likely not necessary on Python 3. However, because
1100 # Note: This is likely not necessary on Python 3. However, because
1100 # the file handle is reused for reads and may be seeked there, we need
1101 # the file handle is reused for reads and may be seeked there, we need
1101 # to be careful before changing this.
1102 # to be careful before changing this.
1102 if self._writinghandles is None:
1103 if self._writinghandles is None:
1103 msg = b'adding revision outside `revlog._writing` context'
1104 msg = b'adding revision outside `revlog._writing` context'
1104 raise error.ProgrammingError(msg)
1105 raise error.ProgrammingError(msg)
1105 ifh, dfh, sdfh = self._writinghandles
1106 ifh, dfh, sdfh = self._writinghandles
1106 if index_end is None:
1107 if index_end is None:
1107 ifh.seek(0, os.SEEK_END)
1108 ifh.seek(0, os.SEEK_END)
1108 else:
1109 else:
1109 ifh.seek(index_end, os.SEEK_SET)
1110 ifh.seek(index_end, os.SEEK_SET)
1110 if dfh:
1111 if dfh:
1111 if data_end is None:
1112 if data_end is None:
1112 dfh.seek(0, os.SEEK_END)
1113 dfh.seek(0, os.SEEK_END)
1113 else:
1114 else:
1114 dfh.seek(data_end, os.SEEK_SET)
1115 dfh.seek(data_end, os.SEEK_SET)
1115 if sdfh:
1116 if sdfh:
1116 sdfh.seek(sidedata_end, os.SEEK_SET)
1117 sdfh.seek(sidedata_end, os.SEEK_SET)
1117
1118
1118 curr = len(self.index) - 1
1119 curr = len(self.index) - 1
1119 if not self.inline:
1120 if not self.inline:
1120 transaction.add(self.data_file, offset)
1121 transaction.add(self.data_file, offset)
1121 if self.sidedata_file:
1122 if self.sidedata_file:
1122 transaction.add(self.sidedata_file, sidedata_offset)
1123 transaction.add(self.sidedata_file, sidedata_offset)
1123 transaction.add(self.canonical_index_file, curr * len(entry))
1124 transaction.add(self.canonical_index_file, curr * len(entry))
1124 if data[0]:
1125 if data[0]:
1125 dfh.write(data[0])
1126 dfh.write(data[0])
1126 dfh.write(data[1])
1127 dfh.write(data[1])
1127 if sidedata:
1128 if sidedata:
1128 sdfh.write(sidedata)
1129 sdfh.write(sidedata)
1129 if self._delay_buffer is None:
1130 if self._delay_buffer is None:
1130 ifh.write(entry)
1131 ifh.write(entry)
1131 else:
1132 else:
1132 self._delay_buffer.append(entry)
1133 self._delay_buffer.append(entry)
1133 elif self._delay_buffer is not None:
1134 elif self._delay_buffer is not None:
1134 msg = b'invalid delayed write on inline revlog'
1135 msg = b'invalid delayed write on inline revlog'
1135 raise error.ProgrammingError(msg)
1136 raise error.ProgrammingError(msg)
1136 else:
1137 else:
1137 offset += curr * self.index.entry_size
1138 offset += curr * self.index.entry_size
1138 transaction.add(self.canonical_index_file, offset)
1139 transaction.add(self.canonical_index_file, offset)
1139 assert not sidedata
1140 assert not sidedata
1140 ifh.write(entry)
1141 ifh.write(entry)
1141 ifh.write(data[0])
1142 ifh.write(data[0])
1142 ifh.write(data[1])
1143 ifh.write(data[1])
1143 return (
1144 return (
1144 ifh.tell(),
1145 ifh.tell(),
1145 dfh.tell() if dfh else None,
1146 dfh.tell() if dfh else None,
1146 sdfh.tell() if sdfh else None,
1147 sdfh.tell() if sdfh else None,
1147 )
1148 )
1148
1149
1149 def _divert_index(self):
1150 def _divert_index(self):
1150 return self.index_file + b'.a'
1151 return self.index_file + b'.a'
1151
1152
1152 def delay(self):
1153 def delay(self):
1153 assert not self.is_open
1154 assert not self.is_open
1154 if self.inline:
1155 if self.inline:
1155 msg = "revlog with delayed write should not be inline"
1156 msg = "revlog with delayed write should not be inline"
1156 raise error.ProgrammingError(msg)
1157 raise error.ProgrammingError(msg)
1157 if self._delay_buffer is not None or self._orig_index_file is not None:
1158 if self._delay_buffer is not None or self._orig_index_file is not None:
1158 # delay or divert already in place
1159 # delay or divert already in place
1159 return None
1160 return None
1160 elif len(self.index) == 0:
1161 elif len(self.index) == 0:
1161 self._orig_index_file = self.index_file
1162 self._orig_index_file = self.index_file
1162 self.index_file = self._divert_index()
1163 self.index_file = self._divert_index()
1163 assert self._orig_index_file is not None
1164 assert self._orig_index_file is not None
1164 assert self.index_file is not None
1165 assert self.index_file is not None
1165 if self.opener.exists(self.index_file):
1166 if self.opener.exists(self.index_file):
1166 self.opener.unlink(self.index_file)
1167 self.opener.unlink(self.index_file)
1167 return self.index_file
1168 return self.index_file
1168 else:
1169 else:
1169 self._delay_buffer = []
1170 self._delay_buffer = []
1170 return None
1171 return None
1171
1172
1172 def write_pending(self):
1173 def write_pending(self):
1173 assert not self.is_open
1174 assert not self.is_open
1174 if self.inline:
1175 if self.inline:
1175 msg = "revlog with delayed write should not be inline"
1176 msg = "revlog with delayed write should not be inline"
1176 raise error.ProgrammingError(msg)
1177 raise error.ProgrammingError(msg)
1177 if self._orig_index_file is not None:
1178 if self._orig_index_file is not None:
1178 return None, True
1179 return None, True
1179 any_pending = False
1180 any_pending = False
1180 pending_index_file = self._divert_index()
1181 pending_index_file = self._divert_index()
1181 if self.opener.exists(pending_index_file):
1182 if self.opener.exists(pending_index_file):
1182 self.opener.unlink(pending_index_file)
1183 self.opener.unlink(pending_index_file)
1183 util.copyfile(
1184 util.copyfile(
1184 self.opener.join(self.index_file),
1185 self.opener.join(self.index_file),
1185 self.opener.join(pending_index_file),
1186 self.opener.join(pending_index_file),
1186 )
1187 )
1187 if self._delay_buffer:
1188 if self._delay_buffer:
1188 with self.opener(pending_index_file, b'r+') as ifh:
1189 with self.opener(pending_index_file, b'r+') as ifh:
1189 ifh.seek(0, os.SEEK_END)
1190 ifh.seek(0, os.SEEK_END)
1190 ifh.write(b"".join(self._delay_buffer))
1191 ifh.write(b"".join(self._delay_buffer))
1191 any_pending = True
1192 any_pending = True
1192 self._delay_buffer = None
1193 self._delay_buffer = None
1193 self._orig_index_file = self.index_file
1194 self._orig_index_file = self.index_file
1194 self.index_file = pending_index_file
1195 self.index_file = pending_index_file
1195 return self.index_file, any_pending
1196 return self.index_file, any_pending
1196
1197
1197 def finalize_pending(self):
1198 def finalize_pending(self):
1198 assert not self.is_open
1199 assert not self.is_open
1199 if self.inline:
1200 if self.inline:
1200 msg = "revlog with delayed write should not be inline"
1201 msg = "revlog with delayed write should not be inline"
1201 raise error.ProgrammingError(msg)
1202 raise error.ProgrammingError(msg)
1202
1203
1203 delay = self._delay_buffer is not None
1204 delay = self._delay_buffer is not None
1204 divert = self._orig_index_file is not None
1205 divert = self._orig_index_file is not None
1205
1206
1206 if delay and divert:
1207 if delay and divert:
1207 assert False, "unreachable"
1208 assert False, "unreachable"
1208 elif delay:
1209 elif delay:
1209 if self._delay_buffer:
1210 if self._delay_buffer:
1210 with self.opener(self.index_file, b'r+') as ifh:
1211 with self.opener(self.index_file, b'r+') as ifh:
1211 ifh.seek(0, os.SEEK_END)
1212 ifh.seek(0, os.SEEK_END)
1212 ifh.write(b"".join(self._delay_buffer))
1213 ifh.write(b"".join(self._delay_buffer))
1213 self._delay_buffer = None
1214 self._delay_buffer = None
1214 elif divert:
1215 elif divert:
1215 if self.opener.exists(self.index_file):
1216 if self.opener.exists(self.index_file):
1216 self.opener.rename(
1217 self.opener.rename(
1217 self.index_file,
1218 self.index_file,
1218 self._orig_index_file,
1219 self._orig_index_file,
1219 checkambig=True,
1220 checkambig=True,
1220 )
1221 )
1221 self.index_file = self._orig_index_file
1222 self.index_file = self._orig_index_file
1222 self._orig_index_file = None
1223 self._orig_index_file = None
1223 else:
1224 else:
1224 msg = b"not delay or divert found on this revlog"
1225 msg = b"not delay or divert found on this revlog"
1225 raise error.ProgrammingError(msg)
1226 raise error.ProgrammingError(msg)
1226 return self.canonical_index_file
1227 return self.canonical_index_file
1227
1228
1228
1229
1229 class revlog:
1230 class revlog:
1230 """
1231 """
1231 the underlying revision storage object
1232 the underlying revision storage object
1232
1233
1233 A revlog consists of two parts, an index and the revision data.
1234 A revlog consists of two parts, an index and the revision data.
1234
1235
1235 The index is a file with a fixed record size containing
1236 The index is a file with a fixed record size containing
1236 information on each revision, including its nodeid (hash), the
1237 information on each revision, including its nodeid (hash), the
1237 nodeids of its parents, the position and offset of its data within
1238 nodeids of its parents, the position and offset of its data within
1238 the data file, and the revision it's based on. Finally, each entry
1239 the data file, and the revision it's based on. Finally, each entry
1239 contains a linkrev entry that can serve as a pointer to external
1240 contains a linkrev entry that can serve as a pointer to external
1240 data.
1241 data.
1241
1242
1242 The revision data itself is a linear collection of data chunks.
1243 The revision data itself is a linear collection of data chunks.
1243 Each chunk represents a revision and is usually represented as a
1244 Each chunk represents a revision and is usually represented as a
1244 delta against the previous chunk. To bound lookup time, runs of
1245 delta against the previous chunk. To bound lookup time, runs of
1245 deltas are limited to about 2 times the length of the original
1246 deltas are limited to about 2 times the length of the original
1246 version data. This makes retrieval of a version proportional to
1247 version data. This makes retrieval of a version proportional to
1247 its size, or O(1) relative to the number of revisions.
1248 its size, or O(1) relative to the number of revisions.
1248
1249
1249 Both pieces of the revlog are written to in an append-only
1250 Both pieces of the revlog are written to in an append-only
1250 fashion, which means we never need to rewrite a file to insert or
1251 fashion, which means we never need to rewrite a file to insert or
1251 remove data, and can use some simple techniques to avoid the need
1252 remove data, and can use some simple techniques to avoid the need
1252 for locking while reading.
1253 for locking while reading.
1253
1254
1254 If checkambig, indexfile is opened with checkambig=True at
1255 If checkambig, indexfile is opened with checkambig=True at
1255 writing, to avoid file stat ambiguity.
1256 writing, to avoid file stat ambiguity.
1256
1257
1257 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1258 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1258 index will be mmapped rather than read if it is larger than the
1259 index will be mmapped rather than read if it is larger than the
1259 configured threshold.
1260 configured threshold.
1260
1261
1261 If censorable is True, the revlog can have censored revisions.
1262 If censorable is True, the revlog can have censored revisions.
1262
1263
1263 If `upperboundcomp` is not None, this is the expected maximal gain from
1264 If `upperboundcomp` is not None, this is the expected maximal gain from
1264 compression for the data content.
1265 compression for the data content.
1265
1266
1266 `concurrencychecker` is an optional function that receives 3 arguments: a
1267 `concurrencychecker` is an optional function that receives 3 arguments: a
1267 file handle, a filename, and an expected position. It should check whether
1268 file handle, a filename, and an expected position. It should check whether
1268 the current position in the file handle is valid, and log/warn/fail (by
1269 the current position in the file handle is valid, and log/warn/fail (by
1269 raising).
1270 raising).
1270
1271
1271 See mercurial/revlogutils/contants.py for details about the content of an
1272 See mercurial/revlogutils/contants.py for details about the content of an
1272 index entry.
1273 index entry.
1273 """
1274 """
1274
1275
1275 _flagserrorclass = error.RevlogError
1276 _flagserrorclass = error.RevlogError
1276
1277
1277 @staticmethod
1278 @staticmethod
1278 def is_inline_index(header_bytes):
1279 def is_inline_index(header_bytes):
1279 """Determine if a revlog is inline from the initial bytes of the index"""
1280 """Determine if a revlog is inline from the initial bytes of the index"""
1280 if len(header_bytes) == 0:
1281 if len(header_bytes) == 0:
1281 return True
1282 return True
1282
1283
1283 header = INDEX_HEADER.unpack(header_bytes)[0]
1284 header = INDEX_HEADER.unpack(header_bytes)[0]
1284
1285
1285 _format_flags = header & ~0xFFFF
1286 _format_flags = header & ~0xFFFF
1286 _format_version = header & 0xFFFF
1287 _format_version = header & 0xFFFF
1287
1288
1288 features = FEATURES_BY_VERSION[_format_version]
1289 features = FEATURES_BY_VERSION[_format_version]
1289 return features[b'inline'](_format_flags)
1290 return features[b'inline'](_format_flags)
1290
1291
1291 def __init__(
1292 def __init__(
1292 self,
1293 self,
1293 opener,
1294 opener,
1294 target,
1295 target,
1295 radix,
1296 radix,
1296 postfix=None, # only exist for `tmpcensored` now
1297 postfix=None, # only exist for `tmpcensored` now
1297 checkambig=False,
1298 checkambig=False,
1298 mmaplargeindex=False,
1299 mmaplargeindex=False,
1299 censorable=False,
1300 censorable=False,
1300 upperboundcomp=None,
1301 upperboundcomp=None,
1301 persistentnodemap=False,
1302 persistentnodemap=False,
1302 concurrencychecker=None,
1303 concurrencychecker=None,
1303 trypending=False,
1304 trypending=False,
1304 try_split=False,
1305 try_split=False,
1305 canonical_parent_order=True,
1306 canonical_parent_order=True,
1306 data_config=None,
1307 data_config=None,
1307 delta_config=None,
1308 delta_config=None,
1308 feature_config=None,
1309 feature_config=None,
1309 may_inline=True, # may inline new revlog
1310 may_inline=True, # may inline new revlog
1310 ):
1311 ):
1311 """
1312 """
1312 create a revlog object
1313 create a revlog object
1313
1314
1314 opener is a function that abstracts the file opening operation
1315 opener is a function that abstracts the file opening operation
1315 and can be used to implement COW semantics or the like.
1316 and can be used to implement COW semantics or the like.
1316
1317
1317 `target`: a (KIND, ID) tuple that identify the content stored in
1318 `target`: a (KIND, ID) tuple that identify the content stored in
1318 this revlog. It help the rest of the code to understand what the revlog
1319 this revlog. It help the rest of the code to understand what the revlog
1319 is about without having to resort to heuristic and index filename
1320 is about without having to resort to heuristic and index filename
1320 analysis. Note: that this must be reliably be set by normal code, but
1321 analysis. Note: that this must be reliably be set by normal code, but
1321 that test, debug, or performance measurement code might not set this to
1322 that test, debug, or performance measurement code might not set this to
1322 accurate value.
1323 accurate value.
1323 """
1324 """
1324
1325
1325 self.radix = radix
1326 self.radix = radix
1326
1327
1327 self._docket_file = None
1328 self._docket_file = None
1328 self._indexfile = None
1329 self._indexfile = None
1329 self._datafile = None
1330 self._datafile = None
1330 self._sidedatafile = None
1331 self._sidedatafile = None
1331 self._nodemap_file = None
1332 self._nodemap_file = None
1332 self.postfix = postfix
1333 self.postfix = postfix
1333 self._trypending = trypending
1334 self._trypending = trypending
1334 self._try_split = try_split
1335 self._try_split = try_split
1335 self._may_inline = may_inline
1336 self._may_inline = may_inline
1336 self.opener = opener
1337 self.opener = opener
1337 if persistentnodemap:
1338 if persistentnodemap:
1338 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1339 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1339
1340
1340 assert target[0] in ALL_KINDS
1341 assert target[0] in ALL_KINDS
1341 assert len(target) == 2
1342 assert len(target) == 2
1342 self.target = target
1343 self.target = target
1343 if feature_config is not None:
1344 if feature_config is not None:
1344 self.feature_config = feature_config.copy()
1345 self.feature_config = feature_config.copy()
1345 elif b'feature-config' in self.opener.options:
1346 elif b'feature-config' in self.opener.options:
1346 self.feature_config = self.opener.options[b'feature-config'].copy()
1347 self.feature_config = self.opener.options[b'feature-config'].copy()
1347 else:
1348 else:
1348 self.feature_config = FeatureConfig()
1349 self.feature_config = FeatureConfig()
1349 self.feature_config.censorable = censorable
1350 self.feature_config.censorable = censorable
1350 self.feature_config.canonical_parent_order = canonical_parent_order
1351 self.feature_config.canonical_parent_order = canonical_parent_order
1351 if data_config is not None:
1352 if data_config is not None:
1352 self.data_config = data_config.copy()
1353 self.data_config = data_config.copy()
1353 elif b'data-config' in self.opener.options:
1354 elif b'data-config' in self.opener.options:
1354 self.data_config = self.opener.options[b'data-config'].copy()
1355 self.data_config = self.opener.options[b'data-config'].copy()
1355 else:
1356 else:
1356 self.data_config = DataConfig()
1357 self.data_config = DataConfig()
1357 self.data_config.check_ambig = checkambig
1358 self.data_config.check_ambig = checkambig
1358 self.data_config.mmap_large_index = mmaplargeindex
1359 self.data_config.mmap_large_index = mmaplargeindex
1359 if delta_config is not None:
1360 if delta_config is not None:
1360 self.delta_config = delta_config.copy()
1361 self.delta_config = delta_config.copy()
1361 elif b'delta-config' in self.opener.options:
1362 elif b'delta-config' in self.opener.options:
1362 self.delta_config = self.opener.options[b'delta-config'].copy()
1363 self.delta_config = self.opener.options[b'delta-config'].copy()
1363 else:
1364 else:
1364 self.delta_config = DeltaConfig()
1365 self.delta_config = DeltaConfig()
1365 self.delta_config.upper_bound_comp = upperboundcomp
1366 self.delta_config.upper_bound_comp = upperboundcomp
1366
1367
1367 # Maps rev to chain base rev.
1368 # Maps rev to chain base rev.
1368 self._chainbasecache = util.lrucachedict(100)
1369 self._chainbasecache = util.lrucachedict(100)
1369
1370
1370 self.index = None
1371 self.index = None
1371 self._docket = None
1372 self._docket = None
1372 self._nodemap_docket = None
1373 self._nodemap_docket = None
1373 # Mapping of partial identifiers to full nodes.
1374 # Mapping of partial identifiers to full nodes.
1374 self._pcache = {}
1375 self._pcache = {}
1375
1376
1376 # other optionnals features
1377 # other optionnals features
1377
1378
1378 # Make copy of flag processors so each revlog instance can support
1379 # Make copy of flag processors so each revlog instance can support
1379 # custom flags.
1380 # custom flags.
1380 self._flagprocessors = dict(flagutil.flagprocessors)
1381 self._flagprocessors = dict(flagutil.flagprocessors)
1381 # prevent nesting of addgroup
1382 # prevent nesting of addgroup
1382 self._adding_group = None
1383 self._adding_group = None
1383
1384
1384 chunk_cache = self._loadindex()
1385 chunk_cache = self._loadindex()
1385 self._load_inner(chunk_cache)
1386 self._load_inner(chunk_cache)
1386 self._concurrencychecker = concurrencychecker
1387 self._concurrencychecker = concurrencychecker
1387
1388
1388 def _init_opts(self):
1389 def _init_opts(self):
1389 """process options (from above/config) to setup associated default revlog mode
1390 """process options (from above/config) to setup associated default revlog mode
1390
1391
1391 These values might be affected when actually reading on disk information.
1392 These values might be affected when actually reading on disk information.
1392
1393
1393 The relevant values are returned for use in _loadindex().
1394 The relevant values are returned for use in _loadindex().
1394
1395
1395 * newversionflags:
1396 * newversionflags:
1396 version header to use if we need to create a new revlog
1397 version header to use if we need to create a new revlog
1397
1398
1398 * mmapindexthreshold:
1399 * mmapindexthreshold:
1399 minimal index size for start to use mmap
1400 minimal index size for start to use mmap
1400
1401
1401 * force_nodemap:
1402 * force_nodemap:
1402 force the usage of a "development" version of the nodemap code
1403 force the usage of a "development" version of the nodemap code
1403 """
1404 """
1404 opts = self.opener.options
1405 opts = self.opener.options
1405
1406
1406 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1407 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1407 new_header = CHANGELOGV2
1408 new_header = CHANGELOGV2
1408 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1409 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1409 self.feature_config.compute_rank = compute_rank
1410 self.feature_config.compute_rank = compute_rank
1410 elif b'revlogv2' in opts:
1411 elif b'revlogv2' in opts:
1411 new_header = REVLOGV2
1412 new_header = REVLOGV2
1412 elif b'revlogv1' in opts:
1413 elif b'revlogv1' in opts:
1413 new_header = REVLOGV1
1414 new_header = REVLOGV1
1414 if self._may_inline:
1415 if self._may_inline:
1415 new_header |= FLAG_INLINE_DATA
1416 new_header |= FLAG_INLINE_DATA
1416 if b'generaldelta' in opts:
1417 if b'generaldelta' in opts:
1417 new_header |= FLAG_GENERALDELTA
1418 new_header |= FLAG_GENERALDELTA
1418 elif b'revlogv0' in self.opener.options:
1419 elif b'revlogv0' in self.opener.options:
1419 new_header = REVLOGV0
1420 new_header = REVLOGV0
1420 else:
1421 else:
1421 new_header = REVLOG_DEFAULT_VERSION
1422 new_header = REVLOG_DEFAULT_VERSION
1422
1423
1423 mmapindexthreshold = None
1424 mmapindexthreshold = None
1424 if self.data_config.mmap_large_index:
1425 if self.data_config.mmap_large_index:
1425 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 if self.feature_config.enable_ellipsis:
1427 if self.feature_config.enable_ellipsis:
1427 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428
1429
1429 # revlog v0 doesn't have flag processors
1430 # revlog v0 doesn't have flag processors
1430 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432
1433
1433 chunk_cache_size = self.data_config.chunk_cache_size
1434 chunk_cache_size = self.data_config.chunk_cache_size
1434 if chunk_cache_size <= 0:
1435 if chunk_cache_size <= 0:
1435 raise error.RevlogError(
1436 raise error.RevlogError(
1436 _(b'revlog chunk cache size %r is not greater than 0')
1437 _(b'revlog chunk cache size %r is not greater than 0')
1437 % chunk_cache_size
1438 % chunk_cache_size
1438 )
1439 )
1439 elif chunk_cache_size & (chunk_cache_size - 1):
1440 elif chunk_cache_size & (chunk_cache_size - 1):
1440 raise error.RevlogError(
1441 raise error.RevlogError(
1441 _(b'revlog chunk cache size %r is not a power of 2')
1442 _(b'revlog chunk cache size %r is not a power of 2')
1442 % chunk_cache_size
1443 % chunk_cache_size
1443 )
1444 )
1444 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 return new_header, mmapindexthreshold, force_nodemap
1446 return new_header, mmapindexthreshold, force_nodemap
1446
1447
1447 def _get_data(self, filepath, mmap_threshold, size=None):
1448 def _get_data(self, filepath, mmap_threshold, size=None):
1448 """return a file content with or without mmap
1449 """return a file content with or without mmap
1449
1450
1450 If the file is missing return the empty string"""
1451 If the file is missing return the empty string"""
1451 try:
1452 try:
1452 with self.opener(filepath) as fp:
1453 with self.opener(filepath) as fp:
1453 if mmap_threshold is not None:
1454 if mmap_threshold is not None:
1454 file_size = self.opener.fstat(fp).st_size
1455 file_size = self.opener.fstat(fp).st_size
1455 if file_size >= mmap_threshold:
1456 if file_size >= mmap_threshold:
1456 if size is not None:
1457 if size is not None:
1457 # avoid potentiel mmap crash
1458 # avoid potentiel mmap crash
1458 size = min(file_size, size)
1459 size = min(file_size, size)
1459 # TODO: should .close() to release resources without
1460 # TODO: should .close() to release resources without
1460 # relying on Python GC
1461 # relying on Python GC
1461 if size is None:
1462 if size is None:
1462 return util.buffer(util.mmapread(fp))
1463 return util.buffer(util.mmapread(fp))
1463 else:
1464 else:
1464 return util.buffer(util.mmapread(fp, size))
1465 return util.buffer(util.mmapread(fp, size))
1465 if size is None:
1466 if size is None:
1466 return fp.read()
1467 return fp.read()
1467 else:
1468 else:
1468 return fp.read(size)
1469 return fp.read(size)
1469 except FileNotFoundError:
1470 except FileNotFoundError:
1470 return b''
1471 return b''
1471
1472
1472 def get_streams(self, max_linkrev, force_inline=False):
1473 def get_streams(self, max_linkrev, force_inline=False):
1473 """return a list of streams that represent this revlog
1474 """return a list of streams that represent this revlog
1474
1475
1475 This is used by stream-clone to do bytes to bytes copies of a repository.
1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1476
1477
1477 This streams data for all revisions that refer to a changelog revision up
1478 This streams data for all revisions that refer to a changelog revision up
1478 to `max_linkrev`.
1479 to `max_linkrev`.
1479
1480
1480 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481
1482
1482 It returns is a list of three-tuple:
1483 It returns is a list of three-tuple:
1483
1484
1484 [
1485 [
1485 (filename, bytes_stream, stream_size),
1486 (filename, bytes_stream, stream_size),
1486
1487
1487 ]
1488 ]
1488 """
1489 """
1489 n = len(self)
1490 n = len(self)
1490 index = self.index
1491 index = self.index
1491 while n > 0:
1492 while n > 0:
1492 linkrev = index[n - 1][4]
1493 linkrev = index[n - 1][4]
1493 if linkrev < max_linkrev:
1494 if linkrev < max_linkrev:
1494 break
1495 break
1495 # note: this loop will rarely go through multiple iterations, since
1496 # note: this loop will rarely go through multiple iterations, since
1496 # it only traverses commits created during the current streaming
1497 # it only traverses commits created during the current streaming
1497 # pull operation.
1498 # pull operation.
1498 #
1499 #
1499 # If this become a problem, using a binary search should cap the
1500 # If this become a problem, using a binary search should cap the
1500 # runtime of this.
1501 # runtime of this.
1501 n = n - 1
1502 n = n - 1
1502 if n == 0:
1503 if n == 0:
1503 # no data to send
1504 # no data to send
1504 return []
1505 return []
1505 index_size = n * index.entry_size
1506 index_size = n * index.entry_size
1506 data_size = self.end(n - 1)
1507 data_size = self.end(n - 1)
1507
1508
1508 # XXX we might have been split (or stripped) since the object
1509 # XXX we might have been split (or stripped) since the object
1509 # initialization, We need to close this race too, but having a way to
1510 # initialization, We need to close this race too, but having a way to
1510 # pre-open the file we feed to the revlog and never closing them before
1511 # pre-open the file we feed to the revlog and never closing them before
1511 # we are done streaming.
1512 # we are done streaming.
1512
1513
1513 if self._inline:
1514 if self._inline:
1514
1515
1515 def get_stream():
1516 def get_stream():
1516 with self.opener(self._indexfile, mode=b"r") as fp:
1517 with self.opener(self._indexfile, mode=b"r") as fp:
1517 yield None
1518 yield None
1518 size = index_size + data_size
1519 size = index_size + data_size
1519 if size <= 65536:
1520 if size <= 65536:
1520 yield fp.read(size)
1521 yield fp.read(size)
1521 else:
1522 else:
1522 yield from util.filechunkiter(fp, limit=size)
1523 yield from util.filechunkiter(fp, limit=size)
1523
1524
1524 inline_stream = get_stream()
1525 inline_stream = get_stream()
1525 next(inline_stream)
1526 next(inline_stream)
1526 return [
1527 return [
1527 (self._indexfile, inline_stream, index_size + data_size),
1528 (self._indexfile, inline_stream, index_size + data_size),
1528 ]
1529 ]
1529 elif force_inline:
1530 elif force_inline:
1530
1531
1531 def get_stream():
1532 def get_stream():
1532 with self.reading():
1533 with self.reading():
1533 yield None
1534 yield None
1534
1535
1535 for rev in range(n):
1536 for rev in range(n):
1536 idx = self.index.entry_binary(rev)
1537 idx = self.index.entry_binary(rev)
1537 if rev == 0 and self._docket is None:
1538 if rev == 0 and self._docket is None:
1538 # re-inject the inline flag
1539 # re-inject the inline flag
1539 header = self._format_flags
1540 header = self._format_flags
1540 header |= self._format_version
1541 header |= self._format_version
1541 header |= FLAG_INLINE_DATA
1542 header |= FLAG_INLINE_DATA
1542 header = self.index.pack_header(header)
1543 header = self.index.pack_header(header)
1543 idx = header + idx
1544 idx = header + idx
1544 yield idx
1545 yield idx
1545 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546
1547
1547 inline_stream = get_stream()
1548 inline_stream = get_stream()
1548 next(inline_stream)
1549 next(inline_stream)
1549 return [
1550 return [
1550 (self._indexfile, inline_stream, index_size + data_size),
1551 (self._indexfile, inline_stream, index_size + data_size),
1551 ]
1552 ]
1552 else:
1553 else:
1553
1554
1554 def get_index_stream():
1555 def get_index_stream():
1555 with self.opener(self._indexfile, mode=b"r") as fp:
1556 with self.opener(self._indexfile, mode=b"r") as fp:
1556 yield None
1557 yield None
1557 if index_size <= 65536:
1558 if index_size <= 65536:
1558 yield fp.read(index_size)
1559 yield fp.read(index_size)
1559 else:
1560 else:
1560 yield from util.filechunkiter(fp, limit=index_size)
1561 yield from util.filechunkiter(fp, limit=index_size)
1561
1562
1562 def get_data_stream():
1563 def get_data_stream():
1563 with self._datafp() as fp:
1564 with self._datafp() as fp:
1564 yield None
1565 yield None
1565 if data_size <= 65536:
1566 if data_size <= 65536:
1566 yield fp.read(data_size)
1567 yield fp.read(data_size)
1567 else:
1568 else:
1568 yield from util.filechunkiter(fp, limit=data_size)
1569 yield from util.filechunkiter(fp, limit=data_size)
1569
1570
1570 index_stream = get_index_stream()
1571 index_stream = get_index_stream()
1571 next(index_stream)
1572 next(index_stream)
1572 data_stream = get_data_stream()
1573 data_stream = get_data_stream()
1573 next(data_stream)
1574 next(data_stream)
1574 return [
1575 return [
1575 (self._datafile, data_stream, data_size),
1576 (self._datafile, data_stream, data_size),
1576 (self._indexfile, index_stream, index_size),
1577 (self._indexfile, index_stream, index_size),
1577 ]
1578 ]
1578
1579
1579 def _loadindex(self, docket=None):
1580 def _loadindex(self, docket=None):
1580
1581
1581 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582
1583
1583 if self.postfix is not None:
1584 if self.postfix is not None:
1584 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 entry_point = b'%s.i.a' % self.radix
1587 entry_point = b'%s.i.a' % self.radix
1587 elif self._try_split and self.opener.exists(self._split_index_file):
1588 elif self._try_split and self.opener.exists(self._split_index_file):
1588 entry_point = self._split_index_file
1589 entry_point = self._split_index_file
1589 else:
1590 else:
1590 entry_point = b'%s.i' % self.radix
1591 entry_point = b'%s.i' % self.radix
1591
1592
1592 if docket is not None:
1593 if docket is not None:
1593 self._docket = docket
1594 self._docket = docket
1594 self._docket_file = entry_point
1595 self._docket_file = entry_point
1595 else:
1596 else:
1596 self._initempty = True
1597 self._initempty = True
1597 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 if len(entry_data) > 0:
1599 if len(entry_data) > 0:
1599 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 self._initempty = False
1601 self._initempty = False
1601 else:
1602 else:
1602 header = new_header
1603 header = new_header
1603
1604
1604 self._format_flags = header & ~0xFFFF
1605 self._format_flags = header & ~0xFFFF
1605 self._format_version = header & 0xFFFF
1606 self._format_version = header & 0xFFFF
1606
1607
1607 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 if supported_flags is None:
1609 if supported_flags is None:
1609 msg = _(b'unknown version (%d) in revlog %s')
1610 msg = _(b'unknown version (%d) in revlog %s')
1610 msg %= (self._format_version, self.display_id)
1611 msg %= (self._format_version, self.display_id)
1611 raise error.RevlogError(msg)
1612 raise error.RevlogError(msg)
1612 elif self._format_flags & ~supported_flags:
1613 elif self._format_flags & ~supported_flags:
1613 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 display_flag = self._format_flags >> 16
1615 display_flag = self._format_flags >> 16
1615 msg %= (display_flag, self._format_version, self.display_id)
1616 msg %= (display_flag, self._format_version, self.display_id)
1616 raise error.RevlogError(msg)
1617 raise error.RevlogError(msg)
1617
1618
1618 features = FEATURES_BY_VERSION[self._format_version]
1619 features = FEATURES_BY_VERSION[self._format_version]
1619 self._inline = features[b'inline'](self._format_flags)
1620 self._inline = features[b'inline'](self._format_flags)
1620 self.delta_config.general_delta = features[b'generaldelta'](
1621 self.delta_config.general_delta = features[b'generaldelta'](
1621 self._format_flags
1622 self._format_flags
1622 )
1623 )
1623 self.feature_config.has_side_data = features[b'sidedata']
1624 self.feature_config.has_side_data = features[b'sidedata']
1624
1625
1625 if not features[b'docket']:
1626 if not features[b'docket']:
1626 self._indexfile = entry_point
1627 self._indexfile = entry_point
1627 index_data = entry_data
1628 index_data = entry_data
1628 else:
1629 else:
1629 self._docket_file = entry_point
1630 self._docket_file = entry_point
1630 if self._initempty:
1631 if self._initempty:
1631 self._docket = docketutil.default_docket(self, header)
1632 self._docket = docketutil.default_docket(self, header)
1632 else:
1633 else:
1633 self._docket = docketutil.parse_docket(
1634 self._docket = docketutil.parse_docket(
1634 self, entry_data, use_pending=self._trypending
1635 self, entry_data, use_pending=self._trypending
1635 )
1636 )
1636
1637
1637 if self._docket is not None:
1638 if self._docket is not None:
1638 self._indexfile = self._docket.index_filepath()
1639 self._indexfile = self._docket.index_filepath()
1639 index_data = b''
1640 index_data = b''
1640 index_size = self._docket.index_end
1641 index_size = self._docket.index_end
1641 if index_size > 0:
1642 if index_size > 0:
1642 index_data = self._get_data(
1643 index_data = self._get_data(
1643 self._indexfile, mmapindexthreshold, size=index_size
1644 self._indexfile, mmapindexthreshold, size=index_size
1644 )
1645 )
1645 if len(index_data) < index_size:
1646 if len(index_data) < index_size:
1646 msg = _(b'too few index data for %s: got %d, expected %d')
1647 msg = _(b'too few index data for %s: got %d, expected %d')
1647 msg %= (self.display_id, len(index_data), index_size)
1648 msg %= (self.display_id, len(index_data), index_size)
1648 raise error.RevlogError(msg)
1649 raise error.RevlogError(msg)
1649
1650
1650 self._inline = False
1651 self._inline = False
1651 # generaldelta implied by version 2 revlogs.
1652 # generaldelta implied by version 2 revlogs.
1652 self.delta_config.general_delta = True
1653 self.delta_config.general_delta = True
1653 # the logic for persistent nodemap will be dealt with within the
1654 # the logic for persistent nodemap will be dealt with within the
1654 # main docket, so disable it for now.
1655 # main docket, so disable it for now.
1655 self._nodemap_file = None
1656 self._nodemap_file = None
1656
1657
1657 if self._docket is not None:
1658 if self._docket is not None:
1658 self._datafile = self._docket.data_filepath()
1659 self._datafile = self._docket.data_filepath()
1659 self._sidedatafile = self._docket.sidedata_filepath()
1660 self._sidedatafile = self._docket.sidedata_filepath()
1660 elif self.postfix is None:
1661 elif self.postfix is None:
1661 self._datafile = b'%s.d' % self.radix
1662 self._datafile = b'%s.d' % self.radix
1662 else:
1663 else:
1663 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664
1665
1665 self.nodeconstants = sha1nodeconstants
1666 self.nodeconstants = sha1nodeconstants
1666 self.nullid = self.nodeconstants.nullid
1667 self.nullid = self.nodeconstants.nullid
1667
1668
1668 # sparse-revlog can't be on without general-delta (issue6056)
1669 # sparse-revlog can't be on without general-delta (issue6056)
1669 if not self.delta_config.general_delta:
1670 if not self.delta_config.general_delta:
1670 self.delta_config.sparse_revlog = False
1671 self.delta_config.sparse_revlog = False
1671
1672
1672 self._storedeltachains = True
1673 self._storedeltachains = True
1673
1674
1674 devel_nodemap = (
1675 devel_nodemap = (
1675 self._nodemap_file
1676 self._nodemap_file
1676 and force_nodemap
1677 and force_nodemap
1677 and parse_index_v1_nodemap is not None
1678 and parse_index_v1_nodemap is not None
1678 )
1679 )
1679
1680
1680 use_rust_index = False
1681 use_rust_index = False
1681 if rustrevlog is not None:
1682 if rustrevlog is not None:
1682 if self._nodemap_file is not None:
1683 if self._nodemap_file is not None:
1683 use_rust_index = True
1684 use_rust_index = True
1684 else:
1685 else:
1685 use_rust_index = self.opener.options.get(b'rust.index')
1686 use_rust_index = self.opener.options.get(b'rust.index')
1686
1687
1687 self._parse_index = parse_index_v1
1688 self._parse_index = parse_index_v1
1688 if self._format_version == REVLOGV0:
1689 if self._format_version == REVLOGV0:
1689 self._parse_index = revlogv0.parse_index_v0
1690 self._parse_index = revlogv0.parse_index_v0
1690 elif self._format_version == REVLOGV2:
1691 elif self._format_version == REVLOGV2:
1691 self._parse_index = parse_index_v2
1692 self._parse_index = parse_index_v2
1692 elif self._format_version == CHANGELOGV2:
1693 elif self._format_version == CHANGELOGV2:
1693 self._parse_index = parse_index_cl_v2
1694 self._parse_index = parse_index_cl_v2
1694 elif devel_nodemap:
1695 elif devel_nodemap:
1695 self._parse_index = parse_index_v1_nodemap
1696 self._parse_index = parse_index_v1_nodemap
1696 elif use_rust_index:
1697 elif use_rust_index:
1697 self._parse_index = parse_index_v1_mixed
1698 self._parse_index = functools.partial(
1699 parse_index_v1_mixed, default_header=new_header
1700 )
1698 try:
1701 try:
1699 d = self._parse_index(index_data, self._inline)
1702 d = self._parse_index(index_data, self._inline)
1700 index, chunkcache = d
1703 index, chunkcache = d
1701 use_nodemap = (
1704 use_nodemap = (
1702 not self._inline
1705 not self._inline
1703 and self._nodemap_file is not None
1706 and self._nodemap_file is not None
1704 and hasattr(index, 'update_nodemap_data')
1707 and hasattr(index, 'update_nodemap_data')
1705 )
1708 )
1706 if use_nodemap:
1709 if use_nodemap:
1707 nodemap_data = nodemaputil.persisted_data(self)
1710 nodemap_data = nodemaputil.persisted_data(self)
1708 if nodemap_data is not None:
1711 if nodemap_data is not None:
1709 docket = nodemap_data[0]
1712 docket = nodemap_data[0]
1710 if (
1713 if (
1711 len(d[0]) > docket.tip_rev
1714 len(d[0]) > docket.tip_rev
1712 and d[0][docket.tip_rev][7] == docket.tip_node
1715 and d[0][docket.tip_rev][7] == docket.tip_node
1713 ):
1716 ):
1714 # no changelog tampering
1717 # no changelog tampering
1715 self._nodemap_docket = docket
1718 self._nodemap_docket = docket
1716 index.update_nodemap_data(*nodemap_data)
1719 index.update_nodemap_data(*nodemap_data)
1717 except (ValueError, IndexError):
1720 except (ValueError, IndexError):
1718 raise error.RevlogError(
1721 raise error.RevlogError(
1719 _(b"index %s is corrupted") % self.display_id
1722 _(b"index %s is corrupted") % self.display_id
1720 )
1723 )
1721 self.index = index
1724 self.index = index
1722 # revnum -> (chain-length, sum-delta-length)
1725 # revnum -> (chain-length, sum-delta-length)
1723 self._chaininfocache = util.lrucachedict(500)
1726 self._chaininfocache = util.lrucachedict(500)
1724
1727
1725 return chunkcache
1728 return chunkcache
1726
1729
1727 def _load_inner(self, chunk_cache):
1730 def _load_inner(self, chunk_cache):
1728 if self._docket is None:
1731 if self._docket is None:
1729 default_compression_header = None
1732 default_compression_header = None
1730 else:
1733 else:
1731 default_compression_header = self._docket.default_compression_header
1734 default_compression_header = self._docket.default_compression_header
1732
1735
1733 self._inner = _InnerRevlog(
1736 self._inner = _InnerRevlog(
1734 opener=self.opener,
1737 opener=self.opener,
1735 index=self.index,
1738 index=self.index,
1736 index_file=self._indexfile,
1739 index_file=self._indexfile,
1737 data_file=self._datafile,
1740 data_file=self._datafile,
1738 sidedata_file=self._sidedatafile,
1741 sidedata_file=self._sidedatafile,
1739 inline=self._inline,
1742 inline=self._inline,
1740 data_config=self.data_config,
1743 data_config=self.data_config,
1741 delta_config=self.delta_config,
1744 delta_config=self.delta_config,
1742 feature_config=self.feature_config,
1745 feature_config=self.feature_config,
1743 chunk_cache=chunk_cache,
1746 chunk_cache=chunk_cache,
1744 default_compression_header=default_compression_header,
1747 default_compression_header=default_compression_header,
1745 )
1748 )
1746
1749
1747 def get_revlog(self):
1750 def get_revlog(self):
1748 """simple function to mirror API of other not-really-revlog API"""
1751 """simple function to mirror API of other not-really-revlog API"""
1749 return self
1752 return self
1750
1753
1751 @util.propertycache
1754 @util.propertycache
1752 def revlog_kind(self):
1755 def revlog_kind(self):
1753 return self.target[0]
1756 return self.target[0]
1754
1757
1755 @util.propertycache
1758 @util.propertycache
1756 def display_id(self):
1759 def display_id(self):
1757 """The public facing "ID" of the revlog that we use in message"""
1760 """The public facing "ID" of the revlog that we use in message"""
1758 if self.revlog_kind == KIND_FILELOG:
1761 if self.revlog_kind == KIND_FILELOG:
1759 # Reference the file without the "data/" prefix, so it is familiar
1762 # Reference the file without the "data/" prefix, so it is familiar
1760 # to the user.
1763 # to the user.
1761 return self.target[1]
1764 return self.target[1]
1762 else:
1765 else:
1763 return self.radix
1766 return self.radix
1764
1767
1765 def _datafp(self, mode=b'r'):
1768 def _datafp(self, mode=b'r'):
1766 """file object for the revlog's data file"""
1769 """file object for the revlog's data file"""
1767 return self.opener(self._datafile, mode=mode)
1770 return self.opener(self._datafile, mode=mode)
1768
1771
1769 def tiprev(self):
1772 def tiprev(self):
1770 return len(self.index) - 1
1773 return len(self.index) - 1
1771
1774
1772 def tip(self):
1775 def tip(self):
1773 return self.node(self.tiprev())
1776 return self.node(self.tiprev())
1774
1777
1775 def __contains__(self, rev):
1778 def __contains__(self, rev):
1776 return 0 <= rev < len(self)
1779 return 0 <= rev < len(self)
1777
1780
1778 def __len__(self):
1781 def __len__(self):
1779 return len(self.index)
1782 return len(self.index)
1780
1783
1781 def __iter__(self):
1784 def __iter__(self):
1782 return iter(range(len(self)))
1785 return iter(range(len(self)))
1783
1786
1784 def revs(self, start=0, stop=None):
1787 def revs(self, start=0, stop=None):
1785 """iterate over all rev in this revlog (from start to stop)"""
1788 """iterate over all rev in this revlog (from start to stop)"""
1786 return storageutil.iterrevs(len(self), start=start, stop=stop)
1789 return storageutil.iterrevs(len(self), start=start, stop=stop)
1787
1790
1788 def hasnode(self, node):
1791 def hasnode(self, node):
1789 try:
1792 try:
1790 self.rev(node)
1793 self.rev(node)
1791 return True
1794 return True
1792 except KeyError:
1795 except KeyError:
1793 return False
1796 return False
1794
1797
1795 def _candelta(self, baserev, rev):
1798 def _candelta(self, baserev, rev):
1796 """whether two revisions (baserev, rev) can be delta-ed or not"""
1799 """whether two revisions (baserev, rev) can be delta-ed or not"""
1797 # Disable delta if either rev requires a content-changing flag
1800 # Disable delta if either rev requires a content-changing flag
1798 # processor (ex. LFS). This is because such flag processor can alter
1801 # processor (ex. LFS). This is because such flag processor can alter
1799 # the rawtext content that the delta will be based on, and two clients
1802 # the rawtext content that the delta will be based on, and two clients
1800 # could have a same revlog node with different flags (i.e. different
1803 # could have a same revlog node with different flags (i.e. different
1801 # rawtext contents) and the delta could be incompatible.
1804 # rawtext contents) and the delta could be incompatible.
1802 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1805 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1803 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1806 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1804 ):
1807 ):
1805 return False
1808 return False
1806 return True
1809 return True
1807
1810
1808 def update_caches(self, transaction):
1811 def update_caches(self, transaction):
1809 """update on disk cache
1812 """update on disk cache
1810
1813
1811 If a transaction is passed, the update may be delayed to transaction
1814 If a transaction is passed, the update may be delayed to transaction
1812 commit."""
1815 commit."""
1813 if self._nodemap_file is not None:
1816 if self._nodemap_file is not None:
1814 if transaction is None:
1817 if transaction is None:
1815 nodemaputil.update_persistent_nodemap(self)
1818 nodemaputil.update_persistent_nodemap(self)
1816 else:
1819 else:
1817 nodemaputil.setup_persistent_nodemap(transaction, self)
1820 nodemaputil.setup_persistent_nodemap(transaction, self)
1818
1821
1819 def clearcaches(self):
1822 def clearcaches(self):
1820 """Clear in-memory caches"""
1823 """Clear in-memory caches"""
1821 self._chainbasecache.clear()
1824 self._chainbasecache.clear()
1822 self._inner.clear_cache()
1825 self._inner.clear_cache()
1823 self._pcache = {}
1826 self._pcache = {}
1824 self._nodemap_docket = None
1827 self._nodemap_docket = None
1825 self.index.clearcaches()
1828 self.index.clearcaches()
1826 # The python code is the one responsible for validating the docket, we
1829 # The python code is the one responsible for validating the docket, we
1827 # end up having to refresh it here.
1830 # end up having to refresh it here.
1828 use_nodemap = (
1831 use_nodemap = (
1829 not self._inline
1832 not self._inline
1830 and self._nodemap_file is not None
1833 and self._nodemap_file is not None
1831 and hasattr(self.index, 'update_nodemap_data')
1834 and hasattr(self.index, 'update_nodemap_data')
1832 )
1835 )
1833 if use_nodemap:
1836 if use_nodemap:
1834 nodemap_data = nodemaputil.persisted_data(self)
1837 nodemap_data = nodemaputil.persisted_data(self)
1835 if nodemap_data is not None:
1838 if nodemap_data is not None:
1836 self._nodemap_docket = nodemap_data[0]
1839 self._nodemap_docket = nodemap_data[0]
1837 self.index.update_nodemap_data(*nodemap_data)
1840 self.index.update_nodemap_data(*nodemap_data)
1838
1841
1839 def rev(self, node):
1842 def rev(self, node):
1840 """return the revision number associated with a <nodeid>"""
1843 """return the revision number associated with a <nodeid>"""
1841 try:
1844 try:
1842 return self.index.rev(node)
1845 return self.index.rev(node)
1843 except TypeError:
1846 except TypeError:
1844 raise
1847 raise
1845 except error.RevlogError:
1848 except error.RevlogError:
1846 # parsers.c radix tree lookup failed
1849 # parsers.c radix tree lookup failed
1847 if (
1850 if (
1848 node == self.nodeconstants.wdirid
1851 node == self.nodeconstants.wdirid
1849 or node in self.nodeconstants.wdirfilenodeids
1852 or node in self.nodeconstants.wdirfilenodeids
1850 ):
1853 ):
1851 raise error.WdirUnsupported
1854 raise error.WdirUnsupported
1852 raise error.LookupError(node, self.display_id, _(b'no node'))
1855 raise error.LookupError(node, self.display_id, _(b'no node'))
1853
1856
1854 # Accessors for index entries.
1857 # Accessors for index entries.
1855
1858
1856 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1859 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1857 # are flags.
1860 # are flags.
1858 def start(self, rev):
1861 def start(self, rev):
1859 return int(self.index[rev][0] >> 16)
1862 return int(self.index[rev][0] >> 16)
1860
1863
1861 def sidedata_cut_off(self, rev):
1864 def sidedata_cut_off(self, rev):
1862 sd_cut_off = self.index[rev][8]
1865 sd_cut_off = self.index[rev][8]
1863 if sd_cut_off != 0:
1866 if sd_cut_off != 0:
1864 return sd_cut_off
1867 return sd_cut_off
1865 # This is some annoying dance, because entries without sidedata
1868 # This is some annoying dance, because entries without sidedata
1866 # currently use 0 as their ofsset. (instead of previous-offset +
1869 # currently use 0 as their ofsset. (instead of previous-offset +
1867 # previous-size)
1870 # previous-size)
1868 #
1871 #
1869 # We should reconsider this sidedata → 0 sidata_offset policy.
1872 # We should reconsider this sidedata → 0 sidata_offset policy.
1870 # In the meantime, we need this.
1873 # In the meantime, we need this.
1871 while 0 <= rev:
1874 while 0 <= rev:
1872 e = self.index[rev]
1875 e = self.index[rev]
1873 if e[9] != 0:
1876 if e[9] != 0:
1874 return e[8] + e[9]
1877 return e[8] + e[9]
1875 rev -= 1
1878 rev -= 1
1876 return 0
1879 return 0
1877
1880
1878 def flags(self, rev):
1881 def flags(self, rev):
1879 return self.index[rev][0] & 0xFFFF
1882 return self.index[rev][0] & 0xFFFF
1880
1883
1881 def length(self, rev):
1884 def length(self, rev):
1882 return self.index[rev][1]
1885 return self.index[rev][1]
1883
1886
1884 def sidedata_length(self, rev):
1887 def sidedata_length(self, rev):
1885 if not self.feature_config.has_side_data:
1888 if not self.feature_config.has_side_data:
1886 return 0
1889 return 0
1887 return self.index[rev][9]
1890 return self.index[rev][9]
1888
1891
1889 def rawsize(self, rev):
1892 def rawsize(self, rev):
1890 """return the length of the uncompressed text for a given revision"""
1893 """return the length of the uncompressed text for a given revision"""
1891 l = self.index[rev][2]
1894 l = self.index[rev][2]
1892 if l >= 0:
1895 if l >= 0:
1893 return l
1896 return l
1894
1897
1895 t = self.rawdata(rev)
1898 t = self.rawdata(rev)
1896 return len(t)
1899 return len(t)
1897
1900
1898 def size(self, rev):
1901 def size(self, rev):
1899 """length of non-raw text (processed by a "read" flag processor)"""
1902 """length of non-raw text (processed by a "read" flag processor)"""
1900 # fast path: if no "read" flag processor could change the content,
1903 # fast path: if no "read" flag processor could change the content,
1901 # size is rawsize. note: ELLIPSIS is known to not change the content.
1904 # size is rawsize. note: ELLIPSIS is known to not change the content.
1902 flags = self.flags(rev)
1905 flags = self.flags(rev)
1903 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1906 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1904 return self.rawsize(rev)
1907 return self.rawsize(rev)
1905
1908
1906 return len(self.revision(rev))
1909 return len(self.revision(rev))
1907
1910
1908 def fast_rank(self, rev):
1911 def fast_rank(self, rev):
1909 """Return the rank of a revision if already known, or None otherwise.
1912 """Return the rank of a revision if already known, or None otherwise.
1910
1913
1911 The rank of a revision is the size of the sub-graph it defines as a
1914 The rank of a revision is the size of the sub-graph it defines as a
1912 head. Equivalently, the rank of a revision `r` is the size of the set
1915 head. Equivalently, the rank of a revision `r` is the size of the set
1913 `ancestors(r)`, `r` included.
1916 `ancestors(r)`, `r` included.
1914
1917
1915 This method returns the rank retrieved from the revlog in constant
1918 This method returns the rank retrieved from the revlog in constant
1916 time. It makes no attempt at computing unknown values for versions of
1919 time. It makes no attempt at computing unknown values for versions of
1917 the revlog which do not persist the rank.
1920 the revlog which do not persist the rank.
1918 """
1921 """
1919 rank = self.index[rev][ENTRY_RANK]
1922 rank = self.index[rev][ENTRY_RANK]
1920 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1923 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1921 return None
1924 return None
1922 if rev == nullrev:
1925 if rev == nullrev:
1923 return 0 # convention
1926 return 0 # convention
1924 return rank
1927 return rank
1925
1928
1926 def chainbase(self, rev):
1929 def chainbase(self, rev):
1927 base = self._chainbasecache.get(rev)
1930 base = self._chainbasecache.get(rev)
1928 if base is not None:
1931 if base is not None:
1929 return base
1932 return base
1930
1933
1931 index = self.index
1934 index = self.index
1932 iterrev = rev
1935 iterrev = rev
1933 base = index[iterrev][3]
1936 base = index[iterrev][3]
1934 while base != iterrev:
1937 while base != iterrev:
1935 iterrev = base
1938 iterrev = base
1936 base = index[iterrev][3]
1939 base = index[iterrev][3]
1937
1940
1938 self._chainbasecache[rev] = base
1941 self._chainbasecache[rev] = base
1939 return base
1942 return base
1940
1943
1941 def linkrev(self, rev):
1944 def linkrev(self, rev):
1942 return self.index[rev][4]
1945 return self.index[rev][4]
1943
1946
1944 def parentrevs(self, rev):
1947 def parentrevs(self, rev):
1945 try:
1948 try:
1946 entry = self.index[rev]
1949 entry = self.index[rev]
1947 except IndexError:
1950 except IndexError:
1948 if rev == wdirrev:
1951 if rev == wdirrev:
1949 raise error.WdirUnsupported
1952 raise error.WdirUnsupported
1950 raise
1953 raise
1951
1954
1952 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1955 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1953 return entry[6], entry[5]
1956 return entry[6], entry[5]
1954 else:
1957 else:
1955 return entry[5], entry[6]
1958 return entry[5], entry[6]
1956
1959
1957 # fast parentrevs(rev) where rev isn't filtered
1960 # fast parentrevs(rev) where rev isn't filtered
1958 _uncheckedparentrevs = parentrevs
1961 _uncheckedparentrevs = parentrevs
1959
1962
1960 def node(self, rev):
1963 def node(self, rev):
1961 try:
1964 try:
1962 return self.index[rev][7]
1965 return self.index[rev][7]
1963 except IndexError:
1966 except IndexError:
1964 if rev == wdirrev:
1967 if rev == wdirrev:
1965 raise error.WdirUnsupported
1968 raise error.WdirUnsupported
1966 raise
1969 raise
1967
1970
1968 # Derived from index values.
1971 # Derived from index values.
1969
1972
1970 def end(self, rev):
1973 def end(self, rev):
1971 return self.start(rev) + self.length(rev)
1974 return self.start(rev) + self.length(rev)
1972
1975
1973 def parents(self, node):
1976 def parents(self, node):
1974 i = self.index
1977 i = self.index
1975 d = i[self.rev(node)]
1978 d = i[self.rev(node)]
1976 # inline node() to avoid function call overhead
1979 # inline node() to avoid function call overhead
1977 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1980 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1978 return i[d[6]][7], i[d[5]][7]
1981 return i[d[6]][7], i[d[5]][7]
1979 else:
1982 else:
1980 return i[d[5]][7], i[d[6]][7]
1983 return i[d[5]][7], i[d[6]][7]
1981
1984
1982 def chainlen(self, rev):
1985 def chainlen(self, rev):
1983 return self._chaininfo(rev)[0]
1986 return self._chaininfo(rev)[0]
1984
1987
1985 def _chaininfo(self, rev):
1988 def _chaininfo(self, rev):
1986 chaininfocache = self._chaininfocache
1989 chaininfocache = self._chaininfocache
1987 if rev in chaininfocache:
1990 if rev in chaininfocache:
1988 return chaininfocache[rev]
1991 return chaininfocache[rev]
1989 index = self.index
1992 index = self.index
1990 generaldelta = self.delta_config.general_delta
1993 generaldelta = self.delta_config.general_delta
1991 iterrev = rev
1994 iterrev = rev
1992 e = index[iterrev]
1995 e = index[iterrev]
1993 clen = 0
1996 clen = 0
1994 compresseddeltalen = 0
1997 compresseddeltalen = 0
1995 while iterrev != e[3]:
1998 while iterrev != e[3]:
1996 clen += 1
1999 clen += 1
1997 compresseddeltalen += e[1]
2000 compresseddeltalen += e[1]
1998 if generaldelta:
2001 if generaldelta:
1999 iterrev = e[3]
2002 iterrev = e[3]
2000 else:
2003 else:
2001 iterrev -= 1
2004 iterrev -= 1
2002 if iterrev in chaininfocache:
2005 if iterrev in chaininfocache:
2003 t = chaininfocache[iterrev]
2006 t = chaininfocache[iterrev]
2004 clen += t[0]
2007 clen += t[0]
2005 compresseddeltalen += t[1]
2008 compresseddeltalen += t[1]
2006 break
2009 break
2007 e = index[iterrev]
2010 e = index[iterrev]
2008 else:
2011 else:
2009 # Add text length of base since decompressing that also takes
2012 # Add text length of base since decompressing that also takes
2010 # work. For cache hits the length is already included.
2013 # work. For cache hits the length is already included.
2011 compresseddeltalen += e[1]
2014 compresseddeltalen += e[1]
2012 r = (clen, compresseddeltalen)
2015 r = (clen, compresseddeltalen)
2013 chaininfocache[rev] = r
2016 chaininfocache[rev] = r
2014 return r
2017 return r
2015
2018
2016 def _deltachain(self, rev, stoprev=None):
2019 def _deltachain(self, rev, stoprev=None):
2017 return self._inner._deltachain(rev, stoprev=stoprev)
2020 return self._inner._deltachain(rev, stoprev=stoprev)
2018
2021
2019 def ancestors(self, revs, stoprev=0, inclusive=False):
2022 def ancestors(self, revs, stoprev=0, inclusive=False):
2020 """Generate the ancestors of 'revs' in reverse revision order.
2023 """Generate the ancestors of 'revs' in reverse revision order.
2021 Does not generate revs lower than stoprev.
2024 Does not generate revs lower than stoprev.
2022
2025
2023 See the documentation for ancestor.lazyancestors for more details."""
2026 See the documentation for ancestor.lazyancestors for more details."""
2024
2027
2025 # first, make sure start revisions aren't filtered
2028 # first, make sure start revisions aren't filtered
2026 revs = list(revs)
2029 revs = list(revs)
2027 checkrev = self.node
2030 checkrev = self.node
2028 for r in revs:
2031 for r in revs:
2029 checkrev(r)
2032 checkrev(r)
2030 # and we're sure ancestors aren't filtered as well
2033 # and we're sure ancestors aren't filtered as well
2031
2034
2032 if rustancestor is not None and self.index.rust_ext_compat:
2035 if rustancestor is not None and self.index.rust_ext_compat:
2033 lazyancestors = rustancestor.LazyAncestors
2036 lazyancestors = rustancestor.LazyAncestors
2034 arg = self.index
2037 arg = self.index
2035 else:
2038 else:
2036 lazyancestors = ancestor.lazyancestors
2039 lazyancestors = ancestor.lazyancestors
2037 arg = self._uncheckedparentrevs
2040 arg = self._uncheckedparentrevs
2038 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2041 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2039
2042
2040 def descendants(self, revs):
2043 def descendants(self, revs):
2041 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2044 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2042
2045
2043 def findcommonmissing(self, common=None, heads=None):
2046 def findcommonmissing(self, common=None, heads=None):
2044 """Return a tuple of the ancestors of common and the ancestors of heads
2047 """Return a tuple of the ancestors of common and the ancestors of heads
2045 that are not ancestors of common. In revset terminology, we return the
2048 that are not ancestors of common. In revset terminology, we return the
2046 tuple:
2049 tuple:
2047
2050
2048 ::common, (::heads) - (::common)
2051 ::common, (::heads) - (::common)
2049
2052
2050 The list is sorted by revision number, meaning it is
2053 The list is sorted by revision number, meaning it is
2051 topologically sorted.
2054 topologically sorted.
2052
2055
2053 'heads' and 'common' are both lists of node IDs. If heads is
2056 'heads' and 'common' are both lists of node IDs. If heads is
2054 not supplied, uses all of the revlog's heads. If common is not
2057 not supplied, uses all of the revlog's heads. If common is not
2055 supplied, uses nullid."""
2058 supplied, uses nullid."""
2056 if common is None:
2059 if common is None:
2057 common = [self.nullid]
2060 common = [self.nullid]
2058 if heads is None:
2061 if heads is None:
2059 heads = self.heads()
2062 heads = self.heads()
2060
2063
2061 common = [self.rev(n) for n in common]
2064 common = [self.rev(n) for n in common]
2062 heads = [self.rev(n) for n in heads]
2065 heads = [self.rev(n) for n in heads]
2063
2066
2064 # we want the ancestors, but inclusive
2067 # we want the ancestors, but inclusive
2065 class lazyset:
2068 class lazyset:
2066 def __init__(self, lazyvalues):
2069 def __init__(self, lazyvalues):
2067 self.addedvalues = set()
2070 self.addedvalues = set()
2068 self.lazyvalues = lazyvalues
2071 self.lazyvalues = lazyvalues
2069
2072
2070 def __contains__(self, value):
2073 def __contains__(self, value):
2071 return value in self.addedvalues or value in self.lazyvalues
2074 return value in self.addedvalues or value in self.lazyvalues
2072
2075
2073 def __iter__(self):
2076 def __iter__(self):
2074 added = self.addedvalues
2077 added = self.addedvalues
2075 for r in added:
2078 for r in added:
2076 yield r
2079 yield r
2077 for r in self.lazyvalues:
2080 for r in self.lazyvalues:
2078 if not r in added:
2081 if not r in added:
2079 yield r
2082 yield r
2080
2083
2081 def add(self, value):
2084 def add(self, value):
2082 self.addedvalues.add(value)
2085 self.addedvalues.add(value)
2083
2086
2084 def update(self, values):
2087 def update(self, values):
2085 self.addedvalues.update(values)
2088 self.addedvalues.update(values)
2086
2089
2087 has = lazyset(self.ancestors(common))
2090 has = lazyset(self.ancestors(common))
2088 has.add(nullrev)
2091 has.add(nullrev)
2089 has.update(common)
2092 has.update(common)
2090
2093
2091 # take all ancestors from heads that aren't in has
2094 # take all ancestors from heads that aren't in has
2092 missing = set()
2095 missing = set()
2093 visit = collections.deque(r for r in heads if r not in has)
2096 visit = collections.deque(r for r in heads if r not in has)
2094 while visit:
2097 while visit:
2095 r = visit.popleft()
2098 r = visit.popleft()
2096 if r in missing:
2099 if r in missing:
2097 continue
2100 continue
2098 else:
2101 else:
2099 missing.add(r)
2102 missing.add(r)
2100 for p in self.parentrevs(r):
2103 for p in self.parentrevs(r):
2101 if p not in has:
2104 if p not in has:
2102 visit.append(p)
2105 visit.append(p)
2103 missing = list(missing)
2106 missing = list(missing)
2104 missing.sort()
2107 missing.sort()
2105 return has, [self.node(miss) for miss in missing]
2108 return has, [self.node(miss) for miss in missing]
2106
2109
2107 def incrementalmissingrevs(self, common=None):
2110 def incrementalmissingrevs(self, common=None):
2108 """Return an object that can be used to incrementally compute the
2111 """Return an object that can be used to incrementally compute the
2109 revision numbers of the ancestors of arbitrary sets that are not
2112 revision numbers of the ancestors of arbitrary sets that are not
2110 ancestors of common. This is an ancestor.incrementalmissingancestors
2113 ancestors of common. This is an ancestor.incrementalmissingancestors
2111 object.
2114 object.
2112
2115
2113 'common' is a list of revision numbers. If common is not supplied, uses
2116 'common' is a list of revision numbers. If common is not supplied, uses
2114 nullrev.
2117 nullrev.
2115 """
2118 """
2116 if common is None:
2119 if common is None:
2117 common = [nullrev]
2120 common = [nullrev]
2118
2121
2119 if rustancestor is not None and self.index.rust_ext_compat:
2122 if rustancestor is not None and self.index.rust_ext_compat:
2120 return rustancestor.MissingAncestors(self.index, common)
2123 return rustancestor.MissingAncestors(self.index, common)
2121 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2124 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2122
2125
2123 def findmissingrevs(self, common=None, heads=None):
2126 def findmissingrevs(self, common=None, heads=None):
2124 """Return the revision numbers of the ancestors of heads that
2127 """Return the revision numbers of the ancestors of heads that
2125 are not ancestors of common.
2128 are not ancestors of common.
2126
2129
2127 More specifically, return a list of revision numbers corresponding to
2130 More specifically, return a list of revision numbers corresponding to
2128 nodes N such that every N satisfies the following constraints:
2131 nodes N such that every N satisfies the following constraints:
2129
2132
2130 1. N is an ancestor of some node in 'heads'
2133 1. N is an ancestor of some node in 'heads'
2131 2. N is not an ancestor of any node in 'common'
2134 2. N is not an ancestor of any node in 'common'
2132
2135
2133 The list is sorted by revision number, meaning it is
2136 The list is sorted by revision number, meaning it is
2134 topologically sorted.
2137 topologically sorted.
2135
2138
2136 'heads' and 'common' are both lists of revision numbers. If heads is
2139 'heads' and 'common' are both lists of revision numbers. If heads is
2137 not supplied, uses all of the revlog's heads. If common is not
2140 not supplied, uses all of the revlog's heads. If common is not
2138 supplied, uses nullid."""
2141 supplied, uses nullid."""
2139 if common is None:
2142 if common is None:
2140 common = [nullrev]
2143 common = [nullrev]
2141 if heads is None:
2144 if heads is None:
2142 heads = self.headrevs()
2145 heads = self.headrevs()
2143
2146
2144 inc = self.incrementalmissingrevs(common=common)
2147 inc = self.incrementalmissingrevs(common=common)
2145 return inc.missingancestors(heads)
2148 return inc.missingancestors(heads)
2146
2149
2147 def findmissing(self, common=None, heads=None):
2150 def findmissing(self, common=None, heads=None):
2148 """Return the ancestors of heads that are not ancestors of common.
2151 """Return the ancestors of heads that are not ancestors of common.
2149
2152
2150 More specifically, return a list of nodes N such that every N
2153 More specifically, return a list of nodes N such that every N
2151 satisfies the following constraints:
2154 satisfies the following constraints:
2152
2155
2153 1. N is an ancestor of some node in 'heads'
2156 1. N is an ancestor of some node in 'heads'
2154 2. N is not an ancestor of any node in 'common'
2157 2. N is not an ancestor of any node in 'common'
2155
2158
2156 The list is sorted by revision number, meaning it is
2159 The list is sorted by revision number, meaning it is
2157 topologically sorted.
2160 topologically sorted.
2158
2161
2159 'heads' and 'common' are both lists of node IDs. If heads is
2162 'heads' and 'common' are both lists of node IDs. If heads is
2160 not supplied, uses all of the revlog's heads. If common is not
2163 not supplied, uses all of the revlog's heads. If common is not
2161 supplied, uses nullid."""
2164 supplied, uses nullid."""
2162 if common is None:
2165 if common is None:
2163 common = [self.nullid]
2166 common = [self.nullid]
2164 if heads is None:
2167 if heads is None:
2165 heads = self.heads()
2168 heads = self.heads()
2166
2169
2167 common = [self.rev(n) for n in common]
2170 common = [self.rev(n) for n in common]
2168 heads = [self.rev(n) for n in heads]
2171 heads = [self.rev(n) for n in heads]
2169
2172
2170 inc = self.incrementalmissingrevs(common=common)
2173 inc = self.incrementalmissingrevs(common=common)
2171 return [self.node(r) for r in inc.missingancestors(heads)]
2174 return [self.node(r) for r in inc.missingancestors(heads)]
2172
2175
2173 def nodesbetween(self, roots=None, heads=None):
2176 def nodesbetween(self, roots=None, heads=None):
2174 """Return a topological path from 'roots' to 'heads'.
2177 """Return a topological path from 'roots' to 'heads'.
2175
2178
2176 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2179 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2177 topologically sorted list of all nodes N that satisfy both of
2180 topologically sorted list of all nodes N that satisfy both of
2178 these constraints:
2181 these constraints:
2179
2182
2180 1. N is a descendant of some node in 'roots'
2183 1. N is a descendant of some node in 'roots'
2181 2. N is an ancestor of some node in 'heads'
2184 2. N is an ancestor of some node in 'heads'
2182
2185
2183 Every node is considered to be both a descendant and an ancestor
2186 Every node is considered to be both a descendant and an ancestor
2184 of itself, so every reachable node in 'roots' and 'heads' will be
2187 of itself, so every reachable node in 'roots' and 'heads' will be
2185 included in 'nodes'.
2188 included in 'nodes'.
2186
2189
2187 'outroots' is the list of reachable nodes in 'roots', i.e., the
2190 'outroots' is the list of reachable nodes in 'roots', i.e., the
2188 subset of 'roots' that is returned in 'nodes'. Likewise,
2191 subset of 'roots' that is returned in 'nodes'. Likewise,
2189 'outheads' is the subset of 'heads' that is also in 'nodes'.
2192 'outheads' is the subset of 'heads' that is also in 'nodes'.
2190
2193
2191 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2194 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2192 unspecified, uses nullid as the only root. If 'heads' is
2195 unspecified, uses nullid as the only root. If 'heads' is
2193 unspecified, uses list of all of the revlog's heads."""
2196 unspecified, uses list of all of the revlog's heads."""
2194 nonodes = ([], [], [])
2197 nonodes = ([], [], [])
2195 if roots is not None:
2198 if roots is not None:
2196 roots = list(roots)
2199 roots = list(roots)
2197 if not roots:
2200 if not roots:
2198 return nonodes
2201 return nonodes
2199 lowestrev = min([self.rev(n) for n in roots])
2202 lowestrev = min([self.rev(n) for n in roots])
2200 else:
2203 else:
2201 roots = [self.nullid] # Everybody's a descendant of nullid
2204 roots = [self.nullid] # Everybody's a descendant of nullid
2202 lowestrev = nullrev
2205 lowestrev = nullrev
2203 if (lowestrev == nullrev) and (heads is None):
2206 if (lowestrev == nullrev) and (heads is None):
2204 # We want _all_ the nodes!
2207 # We want _all_ the nodes!
2205 return (
2208 return (
2206 [self.node(r) for r in self],
2209 [self.node(r) for r in self],
2207 [self.nullid],
2210 [self.nullid],
2208 list(self.heads()),
2211 list(self.heads()),
2209 )
2212 )
2210 if heads is None:
2213 if heads is None:
2211 # All nodes are ancestors, so the latest ancestor is the last
2214 # All nodes are ancestors, so the latest ancestor is the last
2212 # node.
2215 # node.
2213 highestrev = len(self) - 1
2216 highestrev = len(self) - 1
2214 # Set ancestors to None to signal that every node is an ancestor.
2217 # Set ancestors to None to signal that every node is an ancestor.
2215 ancestors = None
2218 ancestors = None
2216 # Set heads to an empty dictionary for later discovery of heads
2219 # Set heads to an empty dictionary for later discovery of heads
2217 heads = {}
2220 heads = {}
2218 else:
2221 else:
2219 heads = list(heads)
2222 heads = list(heads)
2220 if not heads:
2223 if not heads:
2221 return nonodes
2224 return nonodes
2222 ancestors = set()
2225 ancestors = set()
2223 # Turn heads into a dictionary so we can remove 'fake' heads.
2226 # Turn heads into a dictionary so we can remove 'fake' heads.
2224 # Also, later we will be using it to filter out the heads we can't
2227 # Also, later we will be using it to filter out the heads we can't
2225 # find from roots.
2228 # find from roots.
2226 heads = dict.fromkeys(heads, False)
2229 heads = dict.fromkeys(heads, False)
2227 # Start at the top and keep marking parents until we're done.
2230 # Start at the top and keep marking parents until we're done.
2228 nodestotag = set(heads)
2231 nodestotag = set(heads)
2229 # Remember where the top was so we can use it as a limit later.
2232 # Remember where the top was so we can use it as a limit later.
2230 highestrev = max([self.rev(n) for n in nodestotag])
2233 highestrev = max([self.rev(n) for n in nodestotag])
2231 while nodestotag:
2234 while nodestotag:
2232 # grab a node to tag
2235 # grab a node to tag
2233 n = nodestotag.pop()
2236 n = nodestotag.pop()
2234 # Never tag nullid
2237 # Never tag nullid
2235 if n == self.nullid:
2238 if n == self.nullid:
2236 continue
2239 continue
2237 # A node's revision number represents its place in a
2240 # A node's revision number represents its place in a
2238 # topologically sorted list of nodes.
2241 # topologically sorted list of nodes.
2239 r = self.rev(n)
2242 r = self.rev(n)
2240 if r >= lowestrev:
2243 if r >= lowestrev:
2241 if n not in ancestors:
2244 if n not in ancestors:
2242 # If we are possibly a descendant of one of the roots
2245 # If we are possibly a descendant of one of the roots
2243 # and we haven't already been marked as an ancestor
2246 # and we haven't already been marked as an ancestor
2244 ancestors.add(n) # Mark as ancestor
2247 ancestors.add(n) # Mark as ancestor
2245 # Add non-nullid parents to list of nodes to tag.
2248 # Add non-nullid parents to list of nodes to tag.
2246 nodestotag.update(
2249 nodestotag.update(
2247 [p for p in self.parents(n) if p != self.nullid]
2250 [p for p in self.parents(n) if p != self.nullid]
2248 )
2251 )
2249 elif n in heads: # We've seen it before, is it a fake head?
2252 elif n in heads: # We've seen it before, is it a fake head?
2250 # So it is, real heads should not be the ancestors of
2253 # So it is, real heads should not be the ancestors of
2251 # any other heads.
2254 # any other heads.
2252 heads.pop(n)
2255 heads.pop(n)
2253 if not ancestors:
2256 if not ancestors:
2254 return nonodes
2257 return nonodes
2255 # Now that we have our set of ancestors, we want to remove any
2258 # Now that we have our set of ancestors, we want to remove any
2256 # roots that are not ancestors.
2259 # roots that are not ancestors.
2257
2260
2258 # If one of the roots was nullid, everything is included anyway.
2261 # If one of the roots was nullid, everything is included anyway.
2259 if lowestrev > nullrev:
2262 if lowestrev > nullrev:
2260 # But, since we weren't, let's recompute the lowest rev to not
2263 # But, since we weren't, let's recompute the lowest rev to not
2261 # include roots that aren't ancestors.
2264 # include roots that aren't ancestors.
2262
2265
2263 # Filter out roots that aren't ancestors of heads
2266 # Filter out roots that aren't ancestors of heads
2264 roots = [root for root in roots if root in ancestors]
2267 roots = [root for root in roots if root in ancestors]
2265 # Recompute the lowest revision
2268 # Recompute the lowest revision
2266 if roots:
2269 if roots:
2267 lowestrev = min([self.rev(root) for root in roots])
2270 lowestrev = min([self.rev(root) for root in roots])
2268 else:
2271 else:
2269 # No more roots? Return empty list
2272 # No more roots? Return empty list
2270 return nonodes
2273 return nonodes
2271 else:
2274 else:
2272 # We are descending from nullid, and don't need to care about
2275 # We are descending from nullid, and don't need to care about
2273 # any other roots.
2276 # any other roots.
2274 lowestrev = nullrev
2277 lowestrev = nullrev
2275 roots = [self.nullid]
2278 roots = [self.nullid]
2276 # Transform our roots list into a set.
2279 # Transform our roots list into a set.
2277 descendants = set(roots)
2280 descendants = set(roots)
2278 # Also, keep the original roots so we can filter out roots that aren't
2281 # Also, keep the original roots so we can filter out roots that aren't
2279 # 'real' roots (i.e. are descended from other roots).
2282 # 'real' roots (i.e. are descended from other roots).
2280 roots = descendants.copy()
2283 roots = descendants.copy()
2281 # Our topologically sorted list of output nodes.
2284 # Our topologically sorted list of output nodes.
2282 orderedout = []
2285 orderedout = []
2283 # Don't start at nullid since we don't want nullid in our output list,
2286 # Don't start at nullid since we don't want nullid in our output list,
2284 # and if nullid shows up in descendants, empty parents will look like
2287 # and if nullid shows up in descendants, empty parents will look like
2285 # they're descendants.
2288 # they're descendants.
2286 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2289 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2287 n = self.node(r)
2290 n = self.node(r)
2288 isdescendant = False
2291 isdescendant = False
2289 if lowestrev == nullrev: # Everybody is a descendant of nullid
2292 if lowestrev == nullrev: # Everybody is a descendant of nullid
2290 isdescendant = True
2293 isdescendant = True
2291 elif n in descendants:
2294 elif n in descendants:
2292 # n is already a descendant
2295 # n is already a descendant
2293 isdescendant = True
2296 isdescendant = True
2294 # This check only needs to be done here because all the roots
2297 # This check only needs to be done here because all the roots
2295 # will start being marked is descendants before the loop.
2298 # will start being marked is descendants before the loop.
2296 if n in roots:
2299 if n in roots:
2297 # If n was a root, check if it's a 'real' root.
2300 # If n was a root, check if it's a 'real' root.
2298 p = tuple(self.parents(n))
2301 p = tuple(self.parents(n))
2299 # If any of its parents are descendants, it's not a root.
2302 # If any of its parents are descendants, it's not a root.
2300 if (p[0] in descendants) or (p[1] in descendants):
2303 if (p[0] in descendants) or (p[1] in descendants):
2301 roots.remove(n)
2304 roots.remove(n)
2302 else:
2305 else:
2303 p = tuple(self.parents(n))
2306 p = tuple(self.parents(n))
2304 # A node is a descendant if either of its parents are
2307 # A node is a descendant if either of its parents are
2305 # descendants. (We seeded the dependents list with the roots
2308 # descendants. (We seeded the dependents list with the roots
2306 # up there, remember?)
2309 # up there, remember?)
2307 if (p[0] in descendants) or (p[1] in descendants):
2310 if (p[0] in descendants) or (p[1] in descendants):
2308 descendants.add(n)
2311 descendants.add(n)
2309 isdescendant = True
2312 isdescendant = True
2310 if isdescendant and ((ancestors is None) or (n in ancestors)):
2313 if isdescendant and ((ancestors is None) or (n in ancestors)):
2311 # Only include nodes that are both descendants and ancestors.
2314 # Only include nodes that are both descendants and ancestors.
2312 orderedout.append(n)
2315 orderedout.append(n)
2313 if (ancestors is not None) and (n in heads):
2316 if (ancestors is not None) and (n in heads):
2314 # We're trying to figure out which heads are reachable
2317 # We're trying to figure out which heads are reachable
2315 # from roots.
2318 # from roots.
2316 # Mark this head as having been reached
2319 # Mark this head as having been reached
2317 heads[n] = True
2320 heads[n] = True
2318 elif ancestors is None:
2321 elif ancestors is None:
2319 # Otherwise, we're trying to discover the heads.
2322 # Otherwise, we're trying to discover the heads.
2320 # Assume this is a head because if it isn't, the next step
2323 # Assume this is a head because if it isn't, the next step
2321 # will eventually remove it.
2324 # will eventually remove it.
2322 heads[n] = True
2325 heads[n] = True
2323 # But, obviously its parents aren't.
2326 # But, obviously its parents aren't.
2324 for p in self.parents(n):
2327 for p in self.parents(n):
2325 heads.pop(p, None)
2328 heads.pop(p, None)
2326 heads = [head for head, flag in heads.items() if flag]
2329 heads = [head for head, flag in heads.items() if flag]
2327 roots = list(roots)
2330 roots = list(roots)
2328 assert orderedout
2331 assert orderedout
2329 assert roots
2332 assert roots
2330 assert heads
2333 assert heads
2331 return (orderedout, roots, heads)
2334 return (orderedout, roots, heads)
2332
2335
2333 def headrevs(self, revs=None):
2336 def headrevs(self, revs=None):
2334 if revs is None:
2337 if revs is None:
2335 try:
2338 try:
2336 return self.index.headrevs()
2339 return self.index.headrevs()
2337 except AttributeError:
2340 except AttributeError:
2338 return self._headrevs()
2341 return self._headrevs()
2339 if rustdagop is not None and self.index.rust_ext_compat:
2342 if rustdagop is not None and self.index.rust_ext_compat:
2340 return rustdagop.headrevs(self.index, revs)
2343 return rustdagop.headrevs(self.index, revs)
2341 return dagop.headrevs(revs, self._uncheckedparentrevs)
2344 return dagop.headrevs(revs, self._uncheckedparentrevs)
2342
2345
2343 def computephases(self, roots):
2346 def computephases(self, roots):
2344 return self.index.computephasesmapsets(roots)
2347 return self.index.computephasesmapsets(roots)
2345
2348
2346 def _headrevs(self):
2349 def _headrevs(self):
2347 count = len(self)
2350 count = len(self)
2348 if not count:
2351 if not count:
2349 return [nullrev]
2352 return [nullrev]
2350 # we won't iter over filtered rev so nobody is a head at start
2353 # we won't iter over filtered rev so nobody is a head at start
2351 ishead = [0] * (count + 1)
2354 ishead = [0] * (count + 1)
2352 index = self.index
2355 index = self.index
2353 for r in self:
2356 for r in self:
2354 ishead[r] = 1 # I may be an head
2357 ishead[r] = 1 # I may be an head
2355 e = index[r]
2358 e = index[r]
2356 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2359 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2357 return [r for r, val in enumerate(ishead) if val]
2360 return [r for r, val in enumerate(ishead) if val]
2358
2361
2359 def heads(self, start=None, stop=None):
2362 def heads(self, start=None, stop=None):
2360 """return the list of all nodes that have no children
2363 """return the list of all nodes that have no children
2361
2364
2362 if start is specified, only heads that are descendants of
2365 if start is specified, only heads that are descendants of
2363 start will be returned
2366 start will be returned
2364 if stop is specified, it will consider all the revs from stop
2367 if stop is specified, it will consider all the revs from stop
2365 as if they had no children
2368 as if they had no children
2366 """
2369 """
2367 if start is None and stop is None:
2370 if start is None and stop is None:
2368 if not len(self):
2371 if not len(self):
2369 return [self.nullid]
2372 return [self.nullid]
2370 return [self.node(r) for r in self.headrevs()]
2373 return [self.node(r) for r in self.headrevs()]
2371
2374
2372 if start is None:
2375 if start is None:
2373 start = nullrev
2376 start = nullrev
2374 else:
2377 else:
2375 start = self.rev(start)
2378 start = self.rev(start)
2376
2379
2377 stoprevs = {self.rev(n) for n in stop or []}
2380 stoprevs = {self.rev(n) for n in stop or []}
2378
2381
2379 revs = dagop.headrevssubset(
2382 revs = dagop.headrevssubset(
2380 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2383 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2381 )
2384 )
2382
2385
2383 return [self.node(rev) for rev in revs]
2386 return [self.node(rev) for rev in revs]
2384
2387
2385 def children(self, node):
2388 def children(self, node):
2386 """find the children of a given node"""
2389 """find the children of a given node"""
2387 c = []
2390 c = []
2388 p = self.rev(node)
2391 p = self.rev(node)
2389 for r in self.revs(start=p + 1):
2392 for r in self.revs(start=p + 1):
2390 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2393 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2391 if prevs:
2394 if prevs:
2392 for pr in prevs:
2395 for pr in prevs:
2393 if pr == p:
2396 if pr == p:
2394 c.append(self.node(r))
2397 c.append(self.node(r))
2395 elif p == nullrev:
2398 elif p == nullrev:
2396 c.append(self.node(r))
2399 c.append(self.node(r))
2397 return c
2400 return c
2398
2401
2399 def commonancestorsheads(self, a, b):
2402 def commonancestorsheads(self, a, b):
2400 """calculate all the heads of the common ancestors of nodes a and b"""
2403 """calculate all the heads of the common ancestors of nodes a and b"""
2401 a, b = self.rev(a), self.rev(b)
2404 a, b = self.rev(a), self.rev(b)
2402 ancs = self._commonancestorsheads(a, b)
2405 ancs = self._commonancestorsheads(a, b)
2403 return pycompat.maplist(self.node, ancs)
2406 return pycompat.maplist(self.node, ancs)
2404
2407
2405 def _commonancestorsheads(self, *revs):
2408 def _commonancestorsheads(self, *revs):
2406 """calculate all the heads of the common ancestors of revs"""
2409 """calculate all the heads of the common ancestors of revs"""
2407 try:
2410 try:
2408 ancs = self.index.commonancestorsheads(*revs)
2411 ancs = self.index.commonancestorsheads(*revs)
2409 except (AttributeError, OverflowError): # C implementation failed
2412 except (AttributeError, OverflowError): # C implementation failed
2410 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2413 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2411 return ancs
2414 return ancs
2412
2415
2413 def isancestor(self, a, b):
2416 def isancestor(self, a, b):
2414 """return True if node a is an ancestor of node b
2417 """return True if node a is an ancestor of node b
2415
2418
2416 A revision is considered an ancestor of itself."""
2419 A revision is considered an ancestor of itself."""
2417 a, b = self.rev(a), self.rev(b)
2420 a, b = self.rev(a), self.rev(b)
2418 return self.isancestorrev(a, b)
2421 return self.isancestorrev(a, b)
2419
2422
2420 def isancestorrev(self, a, b):
2423 def isancestorrev(self, a, b):
2421 """return True if revision a is an ancestor of revision b
2424 """return True if revision a is an ancestor of revision b
2422
2425
2423 A revision is considered an ancestor of itself.
2426 A revision is considered an ancestor of itself.
2424
2427
2425 The implementation of this is trivial but the use of
2428 The implementation of this is trivial but the use of
2426 reachableroots is not."""
2429 reachableroots is not."""
2427 if a == nullrev:
2430 if a == nullrev:
2428 return True
2431 return True
2429 elif a == b:
2432 elif a == b:
2430 return True
2433 return True
2431 elif a > b:
2434 elif a > b:
2432 return False
2435 return False
2433 return bool(self.reachableroots(a, [b], [a], includepath=False))
2436 return bool(self.reachableroots(a, [b], [a], includepath=False))
2434
2437
2435 def reachableroots(self, minroot, heads, roots, includepath=False):
2438 def reachableroots(self, minroot, heads, roots, includepath=False):
2436 """return (heads(::(<roots> and <roots>::<heads>)))
2439 """return (heads(::(<roots> and <roots>::<heads>)))
2437
2440
2438 If includepath is True, return (<roots>::<heads>)."""
2441 If includepath is True, return (<roots>::<heads>)."""
2439 try:
2442 try:
2440 return self.index.reachableroots2(
2443 return self.index.reachableroots2(
2441 minroot, heads, roots, includepath
2444 minroot, heads, roots, includepath
2442 )
2445 )
2443 except AttributeError:
2446 except AttributeError:
2444 return dagop._reachablerootspure(
2447 return dagop._reachablerootspure(
2445 self.parentrevs, minroot, roots, heads, includepath
2448 self.parentrevs, minroot, roots, heads, includepath
2446 )
2449 )
2447
2450
2448 def ancestor(self, a, b):
2451 def ancestor(self, a, b):
2449 """calculate the "best" common ancestor of nodes a and b"""
2452 """calculate the "best" common ancestor of nodes a and b"""
2450
2453
2451 a, b = self.rev(a), self.rev(b)
2454 a, b = self.rev(a), self.rev(b)
2452 try:
2455 try:
2453 ancs = self.index.ancestors(a, b)
2456 ancs = self.index.ancestors(a, b)
2454 except (AttributeError, OverflowError):
2457 except (AttributeError, OverflowError):
2455 ancs = ancestor.ancestors(self.parentrevs, a, b)
2458 ancs = ancestor.ancestors(self.parentrevs, a, b)
2456 if ancs:
2459 if ancs:
2457 # choose a consistent winner when there's a tie
2460 # choose a consistent winner when there's a tie
2458 return min(map(self.node, ancs))
2461 return min(map(self.node, ancs))
2459 return self.nullid
2462 return self.nullid
2460
2463
2461 def _match(self, id):
2464 def _match(self, id):
2462 if isinstance(id, int):
2465 if isinstance(id, int):
2463 # rev
2466 # rev
2464 return self.node(id)
2467 return self.node(id)
2465 if len(id) == self.nodeconstants.nodelen:
2468 if len(id) == self.nodeconstants.nodelen:
2466 # possibly a binary node
2469 # possibly a binary node
2467 # odds of a binary node being all hex in ASCII are 1 in 10**25
2470 # odds of a binary node being all hex in ASCII are 1 in 10**25
2468 try:
2471 try:
2469 node = id
2472 node = id
2470 self.rev(node) # quick search the index
2473 self.rev(node) # quick search the index
2471 return node
2474 return node
2472 except error.LookupError:
2475 except error.LookupError:
2473 pass # may be partial hex id
2476 pass # may be partial hex id
2474 try:
2477 try:
2475 # str(rev)
2478 # str(rev)
2476 rev = int(id)
2479 rev = int(id)
2477 if b"%d" % rev != id:
2480 if b"%d" % rev != id:
2478 raise ValueError
2481 raise ValueError
2479 if rev < 0:
2482 if rev < 0:
2480 rev = len(self) + rev
2483 rev = len(self) + rev
2481 if rev < 0 or rev >= len(self):
2484 if rev < 0 or rev >= len(self):
2482 raise ValueError
2485 raise ValueError
2483 return self.node(rev)
2486 return self.node(rev)
2484 except (ValueError, OverflowError):
2487 except (ValueError, OverflowError):
2485 pass
2488 pass
2486 if len(id) == 2 * self.nodeconstants.nodelen:
2489 if len(id) == 2 * self.nodeconstants.nodelen:
2487 try:
2490 try:
2488 # a full hex nodeid?
2491 # a full hex nodeid?
2489 node = bin(id)
2492 node = bin(id)
2490 self.rev(node)
2493 self.rev(node)
2491 return node
2494 return node
2492 except (binascii.Error, error.LookupError):
2495 except (binascii.Error, error.LookupError):
2493 pass
2496 pass
2494
2497
2495 def _partialmatch(self, id):
2498 def _partialmatch(self, id):
2496 # we don't care wdirfilenodeids as they should be always full hash
2499 # we don't care wdirfilenodeids as they should be always full hash
2497 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2500 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2498 ambiguous = False
2501 ambiguous = False
2499 try:
2502 try:
2500 partial = self.index.partialmatch(id)
2503 partial = self.index.partialmatch(id)
2501 if partial and self.hasnode(partial):
2504 if partial and self.hasnode(partial):
2502 if maybewdir:
2505 if maybewdir:
2503 # single 'ff...' match in radix tree, ambiguous with wdir
2506 # single 'ff...' match in radix tree, ambiguous with wdir
2504 ambiguous = True
2507 ambiguous = True
2505 else:
2508 else:
2506 return partial
2509 return partial
2507 elif maybewdir:
2510 elif maybewdir:
2508 # no 'ff...' match in radix tree, wdir identified
2511 # no 'ff...' match in radix tree, wdir identified
2509 raise error.WdirUnsupported
2512 raise error.WdirUnsupported
2510 else:
2513 else:
2511 return None
2514 return None
2512 except error.RevlogError:
2515 except error.RevlogError:
2513 # parsers.c radix tree lookup gave multiple matches
2516 # parsers.c radix tree lookup gave multiple matches
2514 # fast path: for unfiltered changelog, radix tree is accurate
2517 # fast path: for unfiltered changelog, radix tree is accurate
2515 if not getattr(self, 'filteredrevs', None):
2518 if not getattr(self, 'filteredrevs', None):
2516 ambiguous = True
2519 ambiguous = True
2517 # fall through to slow path that filters hidden revisions
2520 # fall through to slow path that filters hidden revisions
2518 except (AttributeError, ValueError):
2521 except (AttributeError, ValueError):
2519 # we are pure python, or key is not hex
2522 # we are pure python, or key is not hex
2520 pass
2523 pass
2521 if ambiguous:
2524 if ambiguous:
2522 raise error.AmbiguousPrefixLookupError(
2525 raise error.AmbiguousPrefixLookupError(
2523 id, self.display_id, _(b'ambiguous identifier')
2526 id, self.display_id, _(b'ambiguous identifier')
2524 )
2527 )
2525
2528
2526 if id in self._pcache:
2529 if id in self._pcache:
2527 return self._pcache[id]
2530 return self._pcache[id]
2528
2531
2529 if len(id) <= 40:
2532 if len(id) <= 40:
2530 # hex(node)[:...]
2533 # hex(node)[:...]
2531 l = len(id) // 2 * 2 # grab an even number of digits
2534 l = len(id) // 2 * 2 # grab an even number of digits
2532 try:
2535 try:
2533 # we're dropping the last digit, so let's check that it's hex,
2536 # we're dropping the last digit, so let's check that it's hex,
2534 # to avoid the expensive computation below if it's not
2537 # to avoid the expensive computation below if it's not
2535 if len(id) % 2 > 0:
2538 if len(id) % 2 > 0:
2536 if not (id[-1] in hexdigits):
2539 if not (id[-1] in hexdigits):
2537 return None
2540 return None
2538 prefix = bin(id[:l])
2541 prefix = bin(id[:l])
2539 except binascii.Error:
2542 except binascii.Error:
2540 pass
2543 pass
2541 else:
2544 else:
2542 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2545 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2543 nl = [
2546 nl = [
2544 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2547 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2545 ]
2548 ]
2546 if self.nodeconstants.nullhex.startswith(id):
2549 if self.nodeconstants.nullhex.startswith(id):
2547 nl.append(self.nullid)
2550 nl.append(self.nullid)
2548 if len(nl) > 0:
2551 if len(nl) > 0:
2549 if len(nl) == 1 and not maybewdir:
2552 if len(nl) == 1 and not maybewdir:
2550 self._pcache[id] = nl[0]
2553 self._pcache[id] = nl[0]
2551 return nl[0]
2554 return nl[0]
2552 raise error.AmbiguousPrefixLookupError(
2555 raise error.AmbiguousPrefixLookupError(
2553 id, self.display_id, _(b'ambiguous identifier')
2556 id, self.display_id, _(b'ambiguous identifier')
2554 )
2557 )
2555 if maybewdir:
2558 if maybewdir:
2556 raise error.WdirUnsupported
2559 raise error.WdirUnsupported
2557 return None
2560 return None
2558
2561
2559 def lookup(self, id):
2562 def lookup(self, id):
2560 """locate a node based on:
2563 """locate a node based on:
2561 - revision number or str(revision number)
2564 - revision number or str(revision number)
2562 - nodeid or subset of hex nodeid
2565 - nodeid or subset of hex nodeid
2563 """
2566 """
2564 n = self._match(id)
2567 n = self._match(id)
2565 if n is not None:
2568 if n is not None:
2566 return n
2569 return n
2567 n = self._partialmatch(id)
2570 n = self._partialmatch(id)
2568 if n:
2571 if n:
2569 return n
2572 return n
2570
2573
2571 raise error.LookupError(id, self.display_id, _(b'no match found'))
2574 raise error.LookupError(id, self.display_id, _(b'no match found'))
2572
2575
2573 def shortest(self, node, minlength=1):
2576 def shortest(self, node, minlength=1):
2574 """Find the shortest unambiguous prefix that matches node."""
2577 """Find the shortest unambiguous prefix that matches node."""
2575
2578
2576 def isvalid(prefix):
2579 def isvalid(prefix):
2577 try:
2580 try:
2578 matchednode = self._partialmatch(prefix)
2581 matchednode = self._partialmatch(prefix)
2579 except error.AmbiguousPrefixLookupError:
2582 except error.AmbiguousPrefixLookupError:
2580 return False
2583 return False
2581 except error.WdirUnsupported:
2584 except error.WdirUnsupported:
2582 # single 'ff...' match
2585 # single 'ff...' match
2583 return True
2586 return True
2584 if matchednode is None:
2587 if matchednode is None:
2585 raise error.LookupError(node, self.display_id, _(b'no node'))
2588 raise error.LookupError(node, self.display_id, _(b'no node'))
2586 return True
2589 return True
2587
2590
2588 def maybewdir(prefix):
2591 def maybewdir(prefix):
2589 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2592 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2590
2593
2591 hexnode = hex(node)
2594 hexnode = hex(node)
2592
2595
2593 def disambiguate(hexnode, minlength):
2596 def disambiguate(hexnode, minlength):
2594 """Disambiguate against wdirid."""
2597 """Disambiguate against wdirid."""
2595 for length in range(minlength, len(hexnode) + 1):
2598 for length in range(minlength, len(hexnode) + 1):
2596 prefix = hexnode[:length]
2599 prefix = hexnode[:length]
2597 if not maybewdir(prefix):
2600 if not maybewdir(prefix):
2598 return prefix
2601 return prefix
2599
2602
2600 if not getattr(self, 'filteredrevs', None):
2603 if not getattr(self, 'filteredrevs', None):
2601 try:
2604 try:
2602 length = max(self.index.shortest(node), minlength)
2605 length = max(self.index.shortest(node), minlength)
2603 return disambiguate(hexnode, length)
2606 return disambiguate(hexnode, length)
2604 except error.RevlogError:
2607 except error.RevlogError:
2605 if node != self.nodeconstants.wdirid:
2608 if node != self.nodeconstants.wdirid:
2606 raise error.LookupError(
2609 raise error.LookupError(
2607 node, self.display_id, _(b'no node')
2610 node, self.display_id, _(b'no node')
2608 )
2611 )
2609 except AttributeError:
2612 except AttributeError:
2610 # Fall through to pure code
2613 # Fall through to pure code
2611 pass
2614 pass
2612
2615
2613 if node == self.nodeconstants.wdirid:
2616 if node == self.nodeconstants.wdirid:
2614 for length in range(minlength, len(hexnode) + 1):
2617 for length in range(minlength, len(hexnode) + 1):
2615 prefix = hexnode[:length]
2618 prefix = hexnode[:length]
2616 if isvalid(prefix):
2619 if isvalid(prefix):
2617 return prefix
2620 return prefix
2618
2621
2619 for length in range(minlength, len(hexnode) + 1):
2622 for length in range(minlength, len(hexnode) + 1):
2620 prefix = hexnode[:length]
2623 prefix = hexnode[:length]
2621 if isvalid(prefix):
2624 if isvalid(prefix):
2622 return disambiguate(hexnode, length)
2625 return disambiguate(hexnode, length)
2623
2626
2624 def cmp(self, node, text):
2627 def cmp(self, node, text):
2625 """compare text with a given file revision
2628 """compare text with a given file revision
2626
2629
2627 returns True if text is different than what is stored.
2630 returns True if text is different than what is stored.
2628 """
2631 """
2629 p1, p2 = self.parents(node)
2632 p1, p2 = self.parents(node)
2630 return storageutil.hashrevisionsha1(text, p1, p2) != node
2633 return storageutil.hashrevisionsha1(text, p1, p2) != node
2631
2634
2632 def deltaparent(self, rev):
2635 def deltaparent(self, rev):
2633 """return deltaparent of the given revision"""
2636 """return deltaparent of the given revision"""
2634 base = self.index[rev][3]
2637 base = self.index[rev][3]
2635 if base == rev:
2638 if base == rev:
2636 return nullrev
2639 return nullrev
2637 elif self.delta_config.general_delta:
2640 elif self.delta_config.general_delta:
2638 return base
2641 return base
2639 else:
2642 else:
2640 return rev - 1
2643 return rev - 1
2641
2644
2642 def issnapshot(self, rev):
2645 def issnapshot(self, rev):
2643 """tells whether rev is a snapshot"""
2646 """tells whether rev is a snapshot"""
2644 ret = self._inner.issnapshot(rev)
2647 ret = self._inner.issnapshot(rev)
2645 self.issnapshot = self._inner.issnapshot
2648 self.issnapshot = self._inner.issnapshot
2646 return ret
2649 return ret
2647
2650
2648 def snapshotdepth(self, rev):
2651 def snapshotdepth(self, rev):
2649 """number of snapshot in the chain before this one"""
2652 """number of snapshot in the chain before this one"""
2650 if not self.issnapshot(rev):
2653 if not self.issnapshot(rev):
2651 raise error.ProgrammingError(b'revision %d not a snapshot')
2654 raise error.ProgrammingError(b'revision %d not a snapshot')
2652 return len(self._inner._deltachain(rev)[0]) - 1
2655 return len(self._inner._deltachain(rev)[0]) - 1
2653
2656
2654 def revdiff(self, rev1, rev2):
2657 def revdiff(self, rev1, rev2):
2655 """return or calculate a delta between two revisions
2658 """return or calculate a delta between two revisions
2656
2659
2657 The delta calculated is in binary form and is intended to be written to
2660 The delta calculated is in binary form and is intended to be written to
2658 revlog data directly. So this function needs raw revision data.
2661 revlog data directly. So this function needs raw revision data.
2659 """
2662 """
2660 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2663 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2661 return bytes(self._inner._chunk(rev2))
2664 return bytes(self._inner._chunk(rev2))
2662
2665
2663 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2666 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2664
2667
2665 def revision(self, nodeorrev):
2668 def revision(self, nodeorrev):
2666 """return an uncompressed revision of a given node or revision
2669 """return an uncompressed revision of a given node or revision
2667 number.
2670 number.
2668 """
2671 """
2669 return self._revisiondata(nodeorrev)
2672 return self._revisiondata(nodeorrev)
2670
2673
2671 def sidedata(self, nodeorrev):
2674 def sidedata(self, nodeorrev):
2672 """a map of extra data related to the changeset but not part of the hash
2675 """a map of extra data related to the changeset but not part of the hash
2673
2676
2674 This function currently return a dictionary. However, more advanced
2677 This function currently return a dictionary. However, more advanced
2675 mapping object will likely be used in the future for a more
2678 mapping object will likely be used in the future for a more
2676 efficient/lazy code.
2679 efficient/lazy code.
2677 """
2680 """
2678 # deal with <nodeorrev> argument type
2681 # deal with <nodeorrev> argument type
2679 if isinstance(nodeorrev, int):
2682 if isinstance(nodeorrev, int):
2680 rev = nodeorrev
2683 rev = nodeorrev
2681 else:
2684 else:
2682 rev = self.rev(nodeorrev)
2685 rev = self.rev(nodeorrev)
2683 return self._sidedata(rev)
2686 return self._sidedata(rev)
2684
2687
2685 def _rawtext(self, node, rev):
2688 def _rawtext(self, node, rev):
2686 """return the possibly unvalidated rawtext for a revision
2689 """return the possibly unvalidated rawtext for a revision
2687
2690
2688 returns (rev, rawtext, validated)
2691 returns (rev, rawtext, validated)
2689 """
2692 """
2690 # Check if we have the entry in cache
2693 # Check if we have the entry in cache
2691 # The cache entry looks like (node, rev, rawtext)
2694 # The cache entry looks like (node, rev, rawtext)
2692 if self._inner._revisioncache:
2695 if self._inner._revisioncache:
2693 if self._inner._revisioncache[0] == node:
2696 if self._inner._revisioncache[0] == node:
2694 return (rev, self._inner._revisioncache[2], True)
2697 return (rev, self._inner._revisioncache[2], True)
2695
2698
2696 if rev is None:
2699 if rev is None:
2697 rev = self.rev(node)
2700 rev = self.rev(node)
2698
2701
2699 return self._inner.raw_text(node, rev)
2702 return self._inner.raw_text(node, rev)
2700
2703
2701 def _revisiondata(self, nodeorrev, raw=False):
2704 def _revisiondata(self, nodeorrev, raw=False):
2702 # deal with <nodeorrev> argument type
2705 # deal with <nodeorrev> argument type
2703 if isinstance(nodeorrev, int):
2706 if isinstance(nodeorrev, int):
2704 rev = nodeorrev
2707 rev = nodeorrev
2705 node = self.node(rev)
2708 node = self.node(rev)
2706 else:
2709 else:
2707 node = nodeorrev
2710 node = nodeorrev
2708 rev = None
2711 rev = None
2709
2712
2710 # fast path the special `nullid` rev
2713 # fast path the special `nullid` rev
2711 if node == self.nullid:
2714 if node == self.nullid:
2712 return b""
2715 return b""
2713
2716
2714 # ``rawtext`` is the text as stored inside the revlog. Might be the
2717 # ``rawtext`` is the text as stored inside the revlog. Might be the
2715 # revision or might need to be processed to retrieve the revision.
2718 # revision or might need to be processed to retrieve the revision.
2716 rev, rawtext, validated = self._rawtext(node, rev)
2719 rev, rawtext, validated = self._rawtext(node, rev)
2717
2720
2718 if raw and validated:
2721 if raw and validated:
2719 # if we don't want to process the raw text and that raw
2722 # if we don't want to process the raw text and that raw
2720 # text is cached, we can exit early.
2723 # text is cached, we can exit early.
2721 return rawtext
2724 return rawtext
2722 if rev is None:
2725 if rev is None:
2723 rev = self.rev(node)
2726 rev = self.rev(node)
2724 # the revlog's flag for this revision
2727 # the revlog's flag for this revision
2725 # (usually alter its state or content)
2728 # (usually alter its state or content)
2726 flags = self.flags(rev)
2729 flags = self.flags(rev)
2727
2730
2728 if validated and flags == REVIDX_DEFAULT_FLAGS:
2731 if validated and flags == REVIDX_DEFAULT_FLAGS:
2729 # no extra flags set, no flag processor runs, text = rawtext
2732 # no extra flags set, no flag processor runs, text = rawtext
2730 return rawtext
2733 return rawtext
2731
2734
2732 if raw:
2735 if raw:
2733 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2736 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2734 text = rawtext
2737 text = rawtext
2735 else:
2738 else:
2736 r = flagutil.processflagsread(self, rawtext, flags)
2739 r = flagutil.processflagsread(self, rawtext, flags)
2737 text, validatehash = r
2740 text, validatehash = r
2738 if validatehash:
2741 if validatehash:
2739 self.checkhash(text, node, rev=rev)
2742 self.checkhash(text, node, rev=rev)
2740 if not validated:
2743 if not validated:
2741 self._inner._revisioncache = (node, rev, rawtext)
2744 self._inner._revisioncache = (node, rev, rawtext)
2742
2745
2743 return text
2746 return text
2744
2747
2745 def _sidedata(self, rev):
2748 def _sidedata(self, rev):
2746 """Return the sidedata for a given revision number."""
2749 """Return the sidedata for a given revision number."""
2747 sidedata_end = None
2750 sidedata_end = None
2748 if self._docket is not None:
2751 if self._docket is not None:
2749 sidedata_end = self._docket.sidedata_end
2752 sidedata_end = self._docket.sidedata_end
2750 return self._inner.sidedata(rev, sidedata_end)
2753 return self._inner.sidedata(rev, sidedata_end)
2751
2754
2752 def rawdata(self, nodeorrev):
2755 def rawdata(self, nodeorrev):
2753 """return an uncompressed raw data of a given node or revision number."""
2756 """return an uncompressed raw data of a given node or revision number."""
2754 return self._revisiondata(nodeorrev, raw=True)
2757 return self._revisiondata(nodeorrev, raw=True)
2755
2758
2756 def hash(self, text, p1, p2):
2759 def hash(self, text, p1, p2):
2757 """Compute a node hash.
2760 """Compute a node hash.
2758
2761
2759 Available as a function so that subclasses can replace the hash
2762 Available as a function so that subclasses can replace the hash
2760 as needed.
2763 as needed.
2761 """
2764 """
2762 return storageutil.hashrevisionsha1(text, p1, p2)
2765 return storageutil.hashrevisionsha1(text, p1, p2)
2763
2766
2764 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2767 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2765 """Check node hash integrity.
2768 """Check node hash integrity.
2766
2769
2767 Available as a function so that subclasses can extend hash mismatch
2770 Available as a function so that subclasses can extend hash mismatch
2768 behaviors as needed.
2771 behaviors as needed.
2769 """
2772 """
2770 try:
2773 try:
2771 if p1 is None and p2 is None:
2774 if p1 is None and p2 is None:
2772 p1, p2 = self.parents(node)
2775 p1, p2 = self.parents(node)
2773 if node != self.hash(text, p1, p2):
2776 if node != self.hash(text, p1, p2):
2774 # Clear the revision cache on hash failure. The revision cache
2777 # Clear the revision cache on hash failure. The revision cache
2775 # only stores the raw revision and clearing the cache does have
2778 # only stores the raw revision and clearing the cache does have
2776 # the side-effect that we won't have a cache hit when the raw
2779 # the side-effect that we won't have a cache hit when the raw
2777 # revision data is accessed. But this case should be rare and
2780 # revision data is accessed. But this case should be rare and
2778 # it is extra work to teach the cache about the hash
2781 # it is extra work to teach the cache about the hash
2779 # verification state.
2782 # verification state.
2780 if (
2783 if (
2781 self._inner._revisioncache
2784 self._inner._revisioncache
2782 and self._inner._revisioncache[0] == node
2785 and self._inner._revisioncache[0] == node
2783 ):
2786 ):
2784 self._inner._revisioncache = None
2787 self._inner._revisioncache = None
2785
2788
2786 revornode = rev
2789 revornode = rev
2787 if revornode is None:
2790 if revornode is None:
2788 revornode = templatefilters.short(hex(node))
2791 revornode = templatefilters.short(hex(node))
2789 raise error.RevlogError(
2792 raise error.RevlogError(
2790 _(b"integrity check failed on %s:%s")
2793 _(b"integrity check failed on %s:%s")
2791 % (self.display_id, pycompat.bytestr(revornode))
2794 % (self.display_id, pycompat.bytestr(revornode))
2792 )
2795 )
2793 except error.RevlogError:
2796 except error.RevlogError:
2794 if self.feature_config.censorable and storageutil.iscensoredtext(
2797 if self.feature_config.censorable and storageutil.iscensoredtext(
2795 text
2798 text
2796 ):
2799 ):
2797 raise error.CensoredNodeError(self.display_id, node, text)
2800 raise error.CensoredNodeError(self.display_id, node, text)
2798 raise
2801 raise
2799
2802
2800 @property
2803 @property
2801 def _split_index_file(self):
2804 def _split_index_file(self):
2802 """the path where to expect the index of an ongoing splitting operation
2805 """the path where to expect the index of an ongoing splitting operation
2803
2806
2804 The file will only exist if a splitting operation is in progress, but
2807 The file will only exist if a splitting operation is in progress, but
2805 it is always expected at the same location."""
2808 it is always expected at the same location."""
2806 parts = self.radix.split(b'/')
2809 parts = self.radix.split(b'/')
2807 if len(parts) > 1:
2810 if len(parts) > 1:
2808 # adds a '-s' prefix to the ``data/` or `meta/` base
2811 # adds a '-s' prefix to the ``data/` or `meta/` base
2809 head = parts[0] + b'-s'
2812 head = parts[0] + b'-s'
2810 mids = parts[1:-1]
2813 mids = parts[1:-1]
2811 tail = parts[-1] + b'.i'
2814 tail = parts[-1] + b'.i'
2812 pieces = [head] + mids + [tail]
2815 pieces = [head] + mids + [tail]
2813 return b'/'.join(pieces)
2816 return b'/'.join(pieces)
2814 else:
2817 else:
2815 # the revlog is stored at the root of the store (changelog or
2818 # the revlog is stored at the root of the store (changelog or
2816 # manifest), no risk of collision.
2819 # manifest), no risk of collision.
2817 return self.radix + b'.i.s'
2820 return self.radix + b'.i.s'
2818
2821
2819 def _enforceinlinesize(self, tr, side_write=True):
2822 def _enforceinlinesize(self, tr, side_write=True):
2820 """Check if the revlog is too big for inline and convert if so.
2823 """Check if the revlog is too big for inline and convert if so.
2821
2824
2822 This should be called after revisions are added to the revlog. If the
2825 This should be called after revisions are added to the revlog. If the
2823 revlog has grown too large to be an inline revlog, it will convert it
2826 revlog has grown too large to be an inline revlog, it will convert it
2824 to use multiple index and data files.
2827 to use multiple index and data files.
2825 """
2828 """
2826 tiprev = len(self) - 1
2829 tiprev = len(self) - 1
2827 total_size = self.start(tiprev) + self.length(tiprev)
2830 total_size = self.start(tiprev) + self.length(tiprev)
2828 if not self._inline or (self._may_inline and total_size < _maxinline):
2831 if not self._inline or (self._may_inline and total_size < _maxinline):
2829 return
2832 return
2830
2833
2831 if self._docket is not None:
2834 if self._docket is not None:
2832 msg = b"inline revlog should not have a docket"
2835 msg = b"inline revlog should not have a docket"
2833 raise error.ProgrammingError(msg)
2836 raise error.ProgrammingError(msg)
2834
2837
2835 # In the common case, we enforce inline size because the revlog has
2838 # In the common case, we enforce inline size because the revlog has
2836 # been appened too. And in such case, it must have an initial offset
2839 # been appened too. And in such case, it must have an initial offset
2837 # recorded in the transaction.
2840 # recorded in the transaction.
2838 troffset = tr.findoffset(self._inner.canonical_index_file)
2841 troffset = tr.findoffset(self._inner.canonical_index_file)
2839 pre_touched = troffset is not None
2842 pre_touched = troffset is not None
2840 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2843 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2841 raise error.RevlogError(
2844 raise error.RevlogError(
2842 _(b"%s not found in the transaction") % self._indexfile
2845 _(b"%s not found in the transaction") % self._indexfile
2843 )
2846 )
2844
2847
2845 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2848 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2846 tr.add(self._datafile, 0)
2849 tr.add(self._datafile, 0)
2847
2850
2848 new_index_file_path = None
2851 new_index_file_path = None
2849 if side_write:
2852 if side_write:
2850 old_index_file_path = self._indexfile
2853 old_index_file_path = self._indexfile
2851 new_index_file_path = self._split_index_file
2854 new_index_file_path = self._split_index_file
2852 opener = self.opener
2855 opener = self.opener
2853 weak_self = weakref.ref(self)
2856 weak_self = weakref.ref(self)
2854
2857
2855 # the "split" index replace the real index when the transaction is
2858 # the "split" index replace the real index when the transaction is
2856 # finalized
2859 # finalized
2857 def finalize_callback(tr):
2860 def finalize_callback(tr):
2858 opener.rename(
2861 opener.rename(
2859 new_index_file_path,
2862 new_index_file_path,
2860 old_index_file_path,
2863 old_index_file_path,
2861 checkambig=True,
2864 checkambig=True,
2862 )
2865 )
2863 maybe_self = weak_self()
2866 maybe_self = weak_self()
2864 if maybe_self is not None:
2867 if maybe_self is not None:
2865 maybe_self._indexfile = old_index_file_path
2868 maybe_self._indexfile = old_index_file_path
2866 maybe_self._inner.index_file = maybe_self._indexfile
2869 maybe_self._inner.index_file = maybe_self._indexfile
2867
2870
2868 def abort_callback(tr):
2871 def abort_callback(tr):
2869 maybe_self = weak_self()
2872 maybe_self = weak_self()
2870 if maybe_self is not None:
2873 if maybe_self is not None:
2871 maybe_self._indexfile = old_index_file_path
2874 maybe_self._indexfile = old_index_file_path
2872 maybe_self._inner.inline = True
2875 maybe_self._inner.inline = True
2873 maybe_self._inner.index_file = old_index_file_path
2876 maybe_self._inner.index_file = old_index_file_path
2874
2877
2875 tr.registertmp(new_index_file_path)
2878 tr.registertmp(new_index_file_path)
2876 if self.target[1] is not None:
2879 if self.target[1] is not None:
2877 callback_id = b'000-revlog-split-%d-%s' % self.target
2880 callback_id = b'000-revlog-split-%d-%s' % self.target
2878 else:
2881 else:
2879 callback_id = b'000-revlog-split-%d' % self.target[0]
2882 callback_id = b'000-revlog-split-%d' % self.target[0]
2880 tr.addfinalize(callback_id, finalize_callback)
2883 tr.addfinalize(callback_id, finalize_callback)
2881 tr.addabort(callback_id, abort_callback)
2884 tr.addabort(callback_id, abort_callback)
2882
2885
2883 self._format_flags &= ~FLAG_INLINE_DATA
2886 self._format_flags &= ~FLAG_INLINE_DATA
2884 self._inner.split_inline(
2887 self._inner.split_inline(
2885 tr,
2888 tr,
2886 self._format_flags | self._format_version,
2889 self._format_flags | self._format_version,
2887 new_index_file_path=new_index_file_path,
2890 new_index_file_path=new_index_file_path,
2888 )
2891 )
2889
2892
2890 self._inline = False
2893 self._inline = False
2891 if new_index_file_path is not None:
2894 if new_index_file_path is not None:
2892 self._indexfile = new_index_file_path
2895 self._indexfile = new_index_file_path
2893
2896
2894 nodemaputil.setup_persistent_nodemap(tr, self)
2897 nodemaputil.setup_persistent_nodemap(tr, self)
2895
2898
2896 def _nodeduplicatecallback(self, transaction, node):
2899 def _nodeduplicatecallback(self, transaction, node):
2897 """called when trying to add a node already stored."""
2900 """called when trying to add a node already stored."""
2898
2901
2899 @contextlib.contextmanager
2902 @contextlib.contextmanager
2900 def reading(self):
2903 def reading(self):
2901 with self._inner.reading():
2904 with self._inner.reading():
2902 yield
2905 yield
2903
2906
2904 @contextlib.contextmanager
2907 @contextlib.contextmanager
2905 def _writing(self, transaction):
2908 def _writing(self, transaction):
2906 if self._trypending:
2909 if self._trypending:
2907 msg = b'try to write in a `trypending` revlog: %s'
2910 msg = b'try to write in a `trypending` revlog: %s'
2908 msg %= self.display_id
2911 msg %= self.display_id
2909 raise error.ProgrammingError(msg)
2912 raise error.ProgrammingError(msg)
2910 if self._inner.is_writing:
2913 if self._inner.is_writing:
2911 yield
2914 yield
2912 else:
2915 else:
2913 data_end = None
2916 data_end = None
2914 sidedata_end = None
2917 sidedata_end = None
2915 if self._docket is not None:
2918 if self._docket is not None:
2916 data_end = self._docket.data_end
2919 data_end = self._docket.data_end
2917 sidedata_end = self._docket.sidedata_end
2920 sidedata_end = self._docket.sidedata_end
2918 with self._inner.writing(
2921 with self._inner.writing(
2919 transaction,
2922 transaction,
2920 data_end=data_end,
2923 data_end=data_end,
2921 sidedata_end=sidedata_end,
2924 sidedata_end=sidedata_end,
2922 ):
2925 ):
2923 yield
2926 yield
2924 if self._docket is not None:
2927 if self._docket is not None:
2925 self._write_docket(transaction)
2928 self._write_docket(transaction)
2926
2929
2927 @property
2930 @property
2928 def is_delaying(self):
2931 def is_delaying(self):
2929 return self._inner.is_delaying
2932 return self._inner.is_delaying
2930
2933
2931 def _write_docket(self, transaction):
2934 def _write_docket(self, transaction):
2932 """write the current docket on disk
2935 """write the current docket on disk
2933
2936
2934 Exist as a method to help changelog to implement transaction logic
2937 Exist as a method to help changelog to implement transaction logic
2935
2938
2936 We could also imagine using the same transaction logic for all revlog
2939 We could also imagine using the same transaction logic for all revlog
2937 since docket are cheap."""
2940 since docket are cheap."""
2938 self._docket.write(transaction)
2941 self._docket.write(transaction)
2939
2942
2940 def addrevision(
2943 def addrevision(
2941 self,
2944 self,
2942 text,
2945 text,
2943 transaction,
2946 transaction,
2944 link,
2947 link,
2945 p1,
2948 p1,
2946 p2,
2949 p2,
2947 cachedelta=None,
2950 cachedelta=None,
2948 node=None,
2951 node=None,
2949 flags=REVIDX_DEFAULT_FLAGS,
2952 flags=REVIDX_DEFAULT_FLAGS,
2950 deltacomputer=None,
2953 deltacomputer=None,
2951 sidedata=None,
2954 sidedata=None,
2952 ):
2955 ):
2953 """add a revision to the log
2956 """add a revision to the log
2954
2957
2955 text - the revision data to add
2958 text - the revision data to add
2956 transaction - the transaction object used for rollback
2959 transaction - the transaction object used for rollback
2957 link - the linkrev data to add
2960 link - the linkrev data to add
2958 p1, p2 - the parent nodeids of the revision
2961 p1, p2 - the parent nodeids of the revision
2959 cachedelta - an optional precomputed delta
2962 cachedelta - an optional precomputed delta
2960 node - nodeid of revision; typically node is not specified, and it is
2963 node - nodeid of revision; typically node is not specified, and it is
2961 computed by default as hash(text, p1, p2), however subclasses might
2964 computed by default as hash(text, p1, p2), however subclasses might
2962 use different hashing method (and override checkhash() in such case)
2965 use different hashing method (and override checkhash() in such case)
2963 flags - the known flags to set on the revision
2966 flags - the known flags to set on the revision
2964 deltacomputer - an optional deltacomputer instance shared between
2967 deltacomputer - an optional deltacomputer instance shared between
2965 multiple calls
2968 multiple calls
2966 """
2969 """
2967 if link == nullrev:
2970 if link == nullrev:
2968 raise error.RevlogError(
2971 raise error.RevlogError(
2969 _(b"attempted to add linkrev -1 to %s") % self.display_id
2972 _(b"attempted to add linkrev -1 to %s") % self.display_id
2970 )
2973 )
2971
2974
2972 if sidedata is None:
2975 if sidedata is None:
2973 sidedata = {}
2976 sidedata = {}
2974 elif sidedata and not self.feature_config.has_side_data:
2977 elif sidedata and not self.feature_config.has_side_data:
2975 raise error.ProgrammingError(
2978 raise error.ProgrammingError(
2976 _(b"trying to add sidedata to a revlog who don't support them")
2979 _(b"trying to add sidedata to a revlog who don't support them")
2977 )
2980 )
2978
2981
2979 if flags:
2982 if flags:
2980 node = node or self.hash(text, p1, p2)
2983 node = node or self.hash(text, p1, p2)
2981
2984
2982 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2985 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2983
2986
2984 # If the flag processor modifies the revision data, ignore any provided
2987 # If the flag processor modifies the revision data, ignore any provided
2985 # cachedelta.
2988 # cachedelta.
2986 if rawtext != text:
2989 if rawtext != text:
2987 cachedelta = None
2990 cachedelta = None
2988
2991
2989 if len(rawtext) > _maxentrysize:
2992 if len(rawtext) > _maxentrysize:
2990 raise error.RevlogError(
2993 raise error.RevlogError(
2991 _(
2994 _(
2992 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2995 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2993 )
2996 )
2994 % (self.display_id, len(rawtext))
2997 % (self.display_id, len(rawtext))
2995 )
2998 )
2996
2999
2997 node = node or self.hash(rawtext, p1, p2)
3000 node = node or self.hash(rawtext, p1, p2)
2998 rev = self.index.get_rev(node)
3001 rev = self.index.get_rev(node)
2999 if rev is not None:
3002 if rev is not None:
3000 return rev
3003 return rev
3001
3004
3002 if validatehash:
3005 if validatehash:
3003 self.checkhash(rawtext, node, p1=p1, p2=p2)
3006 self.checkhash(rawtext, node, p1=p1, p2=p2)
3004
3007
3005 return self.addrawrevision(
3008 return self.addrawrevision(
3006 rawtext,
3009 rawtext,
3007 transaction,
3010 transaction,
3008 link,
3011 link,
3009 p1,
3012 p1,
3010 p2,
3013 p2,
3011 node,
3014 node,
3012 flags,
3015 flags,
3013 cachedelta=cachedelta,
3016 cachedelta=cachedelta,
3014 deltacomputer=deltacomputer,
3017 deltacomputer=deltacomputer,
3015 sidedata=sidedata,
3018 sidedata=sidedata,
3016 )
3019 )
3017
3020
3018 def addrawrevision(
3021 def addrawrevision(
3019 self,
3022 self,
3020 rawtext,
3023 rawtext,
3021 transaction,
3024 transaction,
3022 link,
3025 link,
3023 p1,
3026 p1,
3024 p2,
3027 p2,
3025 node,
3028 node,
3026 flags,
3029 flags,
3027 cachedelta=None,
3030 cachedelta=None,
3028 deltacomputer=None,
3031 deltacomputer=None,
3029 sidedata=None,
3032 sidedata=None,
3030 ):
3033 ):
3031 """add a raw revision with known flags, node and parents
3034 """add a raw revision with known flags, node and parents
3032 useful when reusing a revision not stored in this revlog (ex: received
3035 useful when reusing a revision not stored in this revlog (ex: received
3033 over wire, or read from an external bundle).
3036 over wire, or read from an external bundle).
3034 """
3037 """
3035 with self._writing(transaction):
3038 with self._writing(transaction):
3036 return self._addrevision(
3039 return self._addrevision(
3037 node,
3040 node,
3038 rawtext,
3041 rawtext,
3039 transaction,
3042 transaction,
3040 link,
3043 link,
3041 p1,
3044 p1,
3042 p2,
3045 p2,
3043 flags,
3046 flags,
3044 cachedelta,
3047 cachedelta,
3045 deltacomputer=deltacomputer,
3048 deltacomputer=deltacomputer,
3046 sidedata=sidedata,
3049 sidedata=sidedata,
3047 )
3050 )
3048
3051
3049 def compress(self, data):
3052 def compress(self, data):
3050 return self._inner.compress(data)
3053 return self._inner.compress(data)
3051
3054
3052 def decompress(self, data):
3055 def decompress(self, data):
3053 return self._inner.decompress(data)
3056 return self._inner.decompress(data)
3054
3057
3055 def _addrevision(
3058 def _addrevision(
3056 self,
3059 self,
3057 node,
3060 node,
3058 rawtext,
3061 rawtext,
3059 transaction,
3062 transaction,
3060 link,
3063 link,
3061 p1,
3064 p1,
3062 p2,
3065 p2,
3063 flags,
3066 flags,
3064 cachedelta,
3067 cachedelta,
3065 alwayscache=False,
3068 alwayscache=False,
3066 deltacomputer=None,
3069 deltacomputer=None,
3067 sidedata=None,
3070 sidedata=None,
3068 ):
3071 ):
3069 """internal function to add revisions to the log
3072 """internal function to add revisions to the log
3070
3073
3071 see addrevision for argument descriptions.
3074 see addrevision for argument descriptions.
3072
3075
3073 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3076 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3074
3077
3075 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3078 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3076 be used.
3079 be used.
3077
3080
3078 invariants:
3081 invariants:
3079 - rawtext is optional (can be None); if not set, cachedelta must be set.
3082 - rawtext is optional (can be None); if not set, cachedelta must be set.
3080 if both are set, they must correspond to each other.
3083 if both are set, they must correspond to each other.
3081 """
3084 """
3082 if node == self.nullid:
3085 if node == self.nullid:
3083 raise error.RevlogError(
3086 raise error.RevlogError(
3084 _(b"%s: attempt to add null revision") % self.display_id
3087 _(b"%s: attempt to add null revision") % self.display_id
3085 )
3088 )
3086 if (
3089 if (
3087 node == self.nodeconstants.wdirid
3090 node == self.nodeconstants.wdirid
3088 or node in self.nodeconstants.wdirfilenodeids
3091 or node in self.nodeconstants.wdirfilenodeids
3089 ):
3092 ):
3090 raise error.RevlogError(
3093 raise error.RevlogError(
3091 _(b"%s: attempt to add wdir revision") % self.display_id
3094 _(b"%s: attempt to add wdir revision") % self.display_id
3092 )
3095 )
3093 if self._inner._writinghandles is None:
3096 if self._inner._writinghandles is None:
3094 msg = b'adding revision outside `revlog._writing` context'
3097 msg = b'adding revision outside `revlog._writing` context'
3095 raise error.ProgrammingError(msg)
3098 raise error.ProgrammingError(msg)
3096
3099
3097 btext = [rawtext]
3100 btext = [rawtext]
3098
3101
3099 curr = len(self)
3102 curr = len(self)
3100 prev = curr - 1
3103 prev = curr - 1
3101
3104
3102 offset = self._get_data_offset(prev)
3105 offset = self._get_data_offset(prev)
3103
3106
3104 if self._concurrencychecker:
3107 if self._concurrencychecker:
3105 ifh, dfh, sdfh = self._inner._writinghandles
3108 ifh, dfh, sdfh = self._inner._writinghandles
3106 # XXX no checking for the sidedata file
3109 # XXX no checking for the sidedata file
3107 if self._inline:
3110 if self._inline:
3108 # offset is "as if" it were in the .d file, so we need to add on
3111 # offset is "as if" it were in the .d file, so we need to add on
3109 # the size of the entry metadata.
3112 # the size of the entry metadata.
3110 self._concurrencychecker(
3113 self._concurrencychecker(
3111 ifh, self._indexfile, offset + curr * self.index.entry_size
3114 ifh, self._indexfile, offset + curr * self.index.entry_size
3112 )
3115 )
3113 else:
3116 else:
3114 # Entries in the .i are a consistent size.
3117 # Entries in the .i are a consistent size.
3115 self._concurrencychecker(
3118 self._concurrencychecker(
3116 ifh, self._indexfile, curr * self.index.entry_size
3119 ifh, self._indexfile, curr * self.index.entry_size
3117 )
3120 )
3118 self._concurrencychecker(dfh, self._datafile, offset)
3121 self._concurrencychecker(dfh, self._datafile, offset)
3119
3122
3120 p1r, p2r = self.rev(p1), self.rev(p2)
3123 p1r, p2r = self.rev(p1), self.rev(p2)
3121
3124
3122 # full versions are inserted when the needed deltas
3125 # full versions are inserted when the needed deltas
3123 # become comparable to the uncompressed text
3126 # become comparable to the uncompressed text
3124 if rawtext is None:
3127 if rawtext is None:
3125 # need rawtext size, before changed by flag processors, which is
3128 # need rawtext size, before changed by flag processors, which is
3126 # the non-raw size. use revlog explicitly to avoid filelog's extra
3129 # the non-raw size. use revlog explicitly to avoid filelog's extra
3127 # logic that might remove metadata size.
3130 # logic that might remove metadata size.
3128 textlen = mdiff.patchedsize(
3131 textlen = mdiff.patchedsize(
3129 revlog.size(self, cachedelta[0]), cachedelta[1]
3132 revlog.size(self, cachedelta[0]), cachedelta[1]
3130 )
3133 )
3131 else:
3134 else:
3132 textlen = len(rawtext)
3135 textlen = len(rawtext)
3133
3136
3134 if deltacomputer is None:
3137 if deltacomputer is None:
3135 write_debug = None
3138 write_debug = None
3136 if self.delta_config.debug_delta:
3139 if self.delta_config.debug_delta:
3137 write_debug = transaction._report
3140 write_debug = transaction._report
3138 deltacomputer = deltautil.deltacomputer(
3141 deltacomputer = deltautil.deltacomputer(
3139 self, write_debug=write_debug
3142 self, write_debug=write_debug
3140 )
3143 )
3141
3144
3142 if cachedelta is not None and len(cachedelta) == 2:
3145 if cachedelta is not None and len(cachedelta) == 2:
3143 # If the cached delta has no information about how it should be
3146 # If the cached delta has no information about how it should be
3144 # reused, add the default reuse instruction according to the
3147 # reused, add the default reuse instruction according to the
3145 # revlog's configuration.
3148 # revlog's configuration.
3146 if (
3149 if (
3147 self.delta_config.general_delta
3150 self.delta_config.general_delta
3148 and self.delta_config.lazy_delta_base
3151 and self.delta_config.lazy_delta_base
3149 ):
3152 ):
3150 delta_base_reuse = DELTA_BASE_REUSE_TRY
3153 delta_base_reuse = DELTA_BASE_REUSE_TRY
3151 else:
3154 else:
3152 delta_base_reuse = DELTA_BASE_REUSE_NO
3155 delta_base_reuse = DELTA_BASE_REUSE_NO
3153 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3156 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3154
3157
3155 revinfo = revlogutils.revisioninfo(
3158 revinfo = revlogutils.revisioninfo(
3156 node,
3159 node,
3157 p1,
3160 p1,
3158 p2,
3161 p2,
3159 btext,
3162 btext,
3160 textlen,
3163 textlen,
3161 cachedelta,
3164 cachedelta,
3162 flags,
3165 flags,
3163 )
3166 )
3164
3167
3165 deltainfo = deltacomputer.finddeltainfo(revinfo)
3168 deltainfo = deltacomputer.finddeltainfo(revinfo)
3166
3169
3167 compression_mode = COMP_MODE_INLINE
3170 compression_mode = COMP_MODE_INLINE
3168 if self._docket is not None:
3171 if self._docket is not None:
3169 default_comp = self._docket.default_compression_header
3172 default_comp = self._docket.default_compression_header
3170 r = deltautil.delta_compression(default_comp, deltainfo)
3173 r = deltautil.delta_compression(default_comp, deltainfo)
3171 compression_mode, deltainfo = r
3174 compression_mode, deltainfo = r
3172
3175
3173 sidedata_compression_mode = COMP_MODE_INLINE
3176 sidedata_compression_mode = COMP_MODE_INLINE
3174 if sidedata and self.feature_config.has_side_data:
3177 if sidedata and self.feature_config.has_side_data:
3175 sidedata_compression_mode = COMP_MODE_PLAIN
3178 sidedata_compression_mode = COMP_MODE_PLAIN
3176 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3179 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3177 sidedata_offset = self._docket.sidedata_end
3180 sidedata_offset = self._docket.sidedata_end
3178 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3181 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3179 if (
3182 if (
3180 h != b'u'
3183 h != b'u'
3181 and comp_sidedata[0:1] != b'\0'
3184 and comp_sidedata[0:1] != b'\0'
3182 and len(comp_sidedata) < len(serialized_sidedata)
3185 and len(comp_sidedata) < len(serialized_sidedata)
3183 ):
3186 ):
3184 assert not h
3187 assert not h
3185 if (
3188 if (
3186 comp_sidedata[0:1]
3189 comp_sidedata[0:1]
3187 == self._docket.default_compression_header
3190 == self._docket.default_compression_header
3188 ):
3191 ):
3189 sidedata_compression_mode = COMP_MODE_DEFAULT
3192 sidedata_compression_mode = COMP_MODE_DEFAULT
3190 serialized_sidedata = comp_sidedata
3193 serialized_sidedata = comp_sidedata
3191 else:
3194 else:
3192 sidedata_compression_mode = COMP_MODE_INLINE
3195 sidedata_compression_mode = COMP_MODE_INLINE
3193 serialized_sidedata = comp_sidedata
3196 serialized_sidedata = comp_sidedata
3194 else:
3197 else:
3195 serialized_sidedata = b""
3198 serialized_sidedata = b""
3196 # Don't store the offset if the sidedata is empty, that way
3199 # Don't store the offset if the sidedata is empty, that way
3197 # we can easily detect empty sidedata and they will be no different
3200 # we can easily detect empty sidedata and they will be no different
3198 # than ones we manually add.
3201 # than ones we manually add.
3199 sidedata_offset = 0
3202 sidedata_offset = 0
3200
3203
3201 rank = RANK_UNKNOWN
3204 rank = RANK_UNKNOWN
3202 if self.feature_config.compute_rank:
3205 if self.feature_config.compute_rank:
3203 if (p1r, p2r) == (nullrev, nullrev):
3206 if (p1r, p2r) == (nullrev, nullrev):
3204 rank = 1
3207 rank = 1
3205 elif p1r != nullrev and p2r == nullrev:
3208 elif p1r != nullrev and p2r == nullrev:
3206 rank = 1 + self.fast_rank(p1r)
3209 rank = 1 + self.fast_rank(p1r)
3207 elif p1r == nullrev and p2r != nullrev:
3210 elif p1r == nullrev and p2r != nullrev:
3208 rank = 1 + self.fast_rank(p2r)
3211 rank = 1 + self.fast_rank(p2r)
3209 else: # merge node
3212 else: # merge node
3210 if rustdagop is not None and self.index.rust_ext_compat:
3213 if rustdagop is not None and self.index.rust_ext_compat:
3211 rank = rustdagop.rank(self.index, p1r, p2r)
3214 rank = rustdagop.rank(self.index, p1r, p2r)
3212 else:
3215 else:
3213 pmin, pmax = sorted((p1r, p2r))
3216 pmin, pmax = sorted((p1r, p2r))
3214 rank = 1 + self.fast_rank(pmax)
3217 rank = 1 + self.fast_rank(pmax)
3215 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3218 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3216
3219
3217 e = revlogutils.entry(
3220 e = revlogutils.entry(
3218 flags=flags,
3221 flags=flags,
3219 data_offset=offset,
3222 data_offset=offset,
3220 data_compressed_length=deltainfo.deltalen,
3223 data_compressed_length=deltainfo.deltalen,
3221 data_uncompressed_length=textlen,
3224 data_uncompressed_length=textlen,
3222 data_compression_mode=compression_mode,
3225 data_compression_mode=compression_mode,
3223 data_delta_base=deltainfo.base,
3226 data_delta_base=deltainfo.base,
3224 link_rev=link,
3227 link_rev=link,
3225 parent_rev_1=p1r,
3228 parent_rev_1=p1r,
3226 parent_rev_2=p2r,
3229 parent_rev_2=p2r,
3227 node_id=node,
3230 node_id=node,
3228 sidedata_offset=sidedata_offset,
3231 sidedata_offset=sidedata_offset,
3229 sidedata_compressed_length=len(serialized_sidedata),
3232 sidedata_compressed_length=len(serialized_sidedata),
3230 sidedata_compression_mode=sidedata_compression_mode,
3233 sidedata_compression_mode=sidedata_compression_mode,
3231 rank=rank,
3234 rank=rank,
3232 )
3235 )
3233
3236
3234 self.index.append(e)
3237 self.index.append(e)
3235 entry = self.index.entry_binary(curr)
3238 entry = self.index.entry_binary(curr)
3236 if curr == 0 and self._docket is None:
3239 if curr == 0 and self._docket is None:
3237 header = self._format_flags | self._format_version
3240 header = self._format_flags | self._format_version
3238 header = self.index.pack_header(header)
3241 header = self.index.pack_header(header)
3239 entry = header + entry
3242 entry = header + entry
3240 self._writeentry(
3243 self._writeentry(
3241 transaction,
3244 transaction,
3242 entry,
3245 entry,
3243 deltainfo.data,
3246 deltainfo.data,
3244 link,
3247 link,
3245 offset,
3248 offset,
3246 serialized_sidedata,
3249 serialized_sidedata,
3247 sidedata_offset,
3250 sidedata_offset,
3248 )
3251 )
3249
3252
3250 rawtext = btext[0]
3253 rawtext = btext[0]
3251
3254
3252 if alwayscache and rawtext is None:
3255 if alwayscache and rawtext is None:
3253 rawtext = deltacomputer.buildtext(revinfo)
3256 rawtext = deltacomputer.buildtext(revinfo)
3254
3257
3255 if type(rawtext) == bytes: # only accept immutable objects
3258 if type(rawtext) == bytes: # only accept immutable objects
3256 self._inner._revisioncache = (node, curr, rawtext)
3259 self._inner._revisioncache = (node, curr, rawtext)
3257 self._chainbasecache[curr] = deltainfo.chainbase
3260 self._chainbasecache[curr] = deltainfo.chainbase
3258 return curr
3261 return curr
3259
3262
3260 def _get_data_offset(self, prev):
3263 def _get_data_offset(self, prev):
3261 """Returns the current offset in the (in-transaction) data file.
3264 """Returns the current offset in the (in-transaction) data file.
3262 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3265 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3263 file to store that information: since sidedata can be rewritten to the
3266 file to store that information: since sidedata can be rewritten to the
3264 end of the data file within a transaction, you can have cases where, for
3267 end of the data file within a transaction, you can have cases where, for
3265 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3268 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3266 to `n - 1`'s sidedata being written after `n`'s data.
3269 to `n - 1`'s sidedata being written after `n`'s data.
3267
3270
3268 TODO cache this in a docket file before getting out of experimental."""
3271 TODO cache this in a docket file before getting out of experimental."""
3269 if self._docket is None:
3272 if self._docket is None:
3270 return self.end(prev)
3273 return self.end(prev)
3271 else:
3274 else:
3272 return self._docket.data_end
3275 return self._docket.data_end
3273
3276
3274 def _writeentry(
3277 def _writeentry(
3275 self,
3278 self,
3276 transaction,
3279 transaction,
3277 entry,
3280 entry,
3278 data,
3281 data,
3279 link,
3282 link,
3280 offset,
3283 offset,
3281 sidedata,
3284 sidedata,
3282 sidedata_offset,
3285 sidedata_offset,
3283 ):
3286 ):
3284 # Files opened in a+ mode have inconsistent behavior on various
3287 # Files opened in a+ mode have inconsistent behavior on various
3285 # platforms. Windows requires that a file positioning call be made
3288 # platforms. Windows requires that a file positioning call be made
3286 # when the file handle transitions between reads and writes. See
3289 # when the file handle transitions between reads and writes. See
3287 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3290 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3288 # platforms, Python or the platform itself can be buggy. Some versions
3291 # platforms, Python or the platform itself can be buggy. Some versions
3289 # of Solaris have been observed to not append at the end of the file
3292 # of Solaris have been observed to not append at the end of the file
3290 # if the file was seeked to before the end. See issue4943 for more.
3293 # if the file was seeked to before the end. See issue4943 for more.
3291 #
3294 #
3292 # We work around this issue by inserting a seek() before writing.
3295 # We work around this issue by inserting a seek() before writing.
3293 # Note: This is likely not necessary on Python 3. However, because
3296 # Note: This is likely not necessary on Python 3. However, because
3294 # the file handle is reused for reads and may be seeked there, we need
3297 # the file handle is reused for reads and may be seeked there, we need
3295 # to be careful before changing this.
3298 # to be careful before changing this.
3296 index_end = data_end = sidedata_end = None
3299 index_end = data_end = sidedata_end = None
3297 if self._docket is not None:
3300 if self._docket is not None:
3298 index_end = self._docket.index_end
3301 index_end = self._docket.index_end
3299 data_end = self._docket.data_end
3302 data_end = self._docket.data_end
3300 sidedata_end = self._docket.sidedata_end
3303 sidedata_end = self._docket.sidedata_end
3301
3304
3302 files_end = self._inner.write_entry(
3305 files_end = self._inner.write_entry(
3303 transaction,
3306 transaction,
3304 entry,
3307 entry,
3305 data,
3308 data,
3306 link,
3309 link,
3307 offset,
3310 offset,
3308 sidedata,
3311 sidedata,
3309 sidedata_offset,
3312 sidedata_offset,
3310 index_end,
3313 index_end,
3311 data_end,
3314 data_end,
3312 sidedata_end,
3315 sidedata_end,
3313 )
3316 )
3314 self._enforceinlinesize(transaction)
3317 self._enforceinlinesize(transaction)
3315 if self._docket is not None:
3318 if self._docket is not None:
3316 self._docket.index_end = files_end[0]
3319 self._docket.index_end = files_end[0]
3317 self._docket.data_end = files_end[1]
3320 self._docket.data_end = files_end[1]
3318 self._docket.sidedata_end = files_end[2]
3321 self._docket.sidedata_end = files_end[2]
3319
3322
3320 nodemaputil.setup_persistent_nodemap(transaction, self)
3323 nodemaputil.setup_persistent_nodemap(transaction, self)
3321
3324
3322 def addgroup(
3325 def addgroup(
3323 self,
3326 self,
3324 deltas,
3327 deltas,
3325 linkmapper,
3328 linkmapper,
3326 transaction,
3329 transaction,
3327 alwayscache=False,
3330 alwayscache=False,
3328 addrevisioncb=None,
3331 addrevisioncb=None,
3329 duplicaterevisioncb=None,
3332 duplicaterevisioncb=None,
3330 debug_info=None,
3333 debug_info=None,
3331 delta_base_reuse_policy=None,
3334 delta_base_reuse_policy=None,
3332 ):
3335 ):
3333 """
3336 """
3334 add a delta group
3337 add a delta group
3335
3338
3336 given a set of deltas, add them to the revision log. the
3339 given a set of deltas, add them to the revision log. the
3337 first delta is against its parent, which should be in our
3340 first delta is against its parent, which should be in our
3338 log, the rest are against the previous delta.
3341 log, the rest are against the previous delta.
3339
3342
3340 If ``addrevisioncb`` is defined, it will be called with arguments of
3343 If ``addrevisioncb`` is defined, it will be called with arguments of
3341 this revlog and the node that was added.
3344 this revlog and the node that was added.
3342 """
3345 """
3343
3346
3344 if self._adding_group:
3347 if self._adding_group:
3345 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3348 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3346
3349
3347 # read the default delta-base reuse policy from revlog config if the
3350 # read the default delta-base reuse policy from revlog config if the
3348 # group did not specify one.
3351 # group did not specify one.
3349 if delta_base_reuse_policy is None:
3352 if delta_base_reuse_policy is None:
3350 if (
3353 if (
3351 self.delta_config.general_delta
3354 self.delta_config.general_delta
3352 and self.delta_config.lazy_delta_base
3355 and self.delta_config.lazy_delta_base
3353 ):
3356 ):
3354 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3357 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3355 else:
3358 else:
3356 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3359 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3357
3360
3358 self._adding_group = True
3361 self._adding_group = True
3359 empty = True
3362 empty = True
3360 try:
3363 try:
3361 with self._writing(transaction):
3364 with self._writing(transaction):
3362 write_debug = None
3365 write_debug = None
3363 if self.delta_config.debug_delta:
3366 if self.delta_config.debug_delta:
3364 write_debug = transaction._report
3367 write_debug = transaction._report
3365 deltacomputer = deltautil.deltacomputer(
3368 deltacomputer = deltautil.deltacomputer(
3366 self,
3369 self,
3367 write_debug=write_debug,
3370 write_debug=write_debug,
3368 debug_info=debug_info,
3371 debug_info=debug_info,
3369 )
3372 )
3370 # loop through our set of deltas
3373 # loop through our set of deltas
3371 for data in deltas:
3374 for data in deltas:
3372 (
3375 (
3373 node,
3376 node,
3374 p1,
3377 p1,
3375 p2,
3378 p2,
3376 linknode,
3379 linknode,
3377 deltabase,
3380 deltabase,
3378 delta,
3381 delta,
3379 flags,
3382 flags,
3380 sidedata,
3383 sidedata,
3381 ) = data
3384 ) = data
3382 link = linkmapper(linknode)
3385 link = linkmapper(linknode)
3383 flags = flags or REVIDX_DEFAULT_FLAGS
3386 flags = flags or REVIDX_DEFAULT_FLAGS
3384
3387
3385 rev = self.index.get_rev(node)
3388 rev = self.index.get_rev(node)
3386 if rev is not None:
3389 if rev is not None:
3387 # this can happen if two branches make the same change
3390 # this can happen if two branches make the same change
3388 self._nodeduplicatecallback(transaction, rev)
3391 self._nodeduplicatecallback(transaction, rev)
3389 if duplicaterevisioncb:
3392 if duplicaterevisioncb:
3390 duplicaterevisioncb(self, rev)
3393 duplicaterevisioncb(self, rev)
3391 empty = False
3394 empty = False
3392 continue
3395 continue
3393
3396
3394 for p in (p1, p2):
3397 for p in (p1, p2):
3395 if not self.index.has_node(p):
3398 if not self.index.has_node(p):
3396 raise error.LookupError(
3399 raise error.LookupError(
3397 p, self.radix, _(b'unknown parent')
3400 p, self.radix, _(b'unknown parent')
3398 )
3401 )
3399
3402
3400 if not self.index.has_node(deltabase):
3403 if not self.index.has_node(deltabase):
3401 raise error.LookupError(
3404 raise error.LookupError(
3402 deltabase, self.display_id, _(b'unknown delta base')
3405 deltabase, self.display_id, _(b'unknown delta base')
3403 )
3406 )
3404
3407
3405 baserev = self.rev(deltabase)
3408 baserev = self.rev(deltabase)
3406
3409
3407 if baserev != nullrev and self.iscensored(baserev):
3410 if baserev != nullrev and self.iscensored(baserev):
3408 # if base is censored, delta must be full replacement in a
3411 # if base is censored, delta must be full replacement in a
3409 # single patch operation
3412 # single patch operation
3410 hlen = struct.calcsize(b">lll")
3413 hlen = struct.calcsize(b">lll")
3411 oldlen = self.rawsize(baserev)
3414 oldlen = self.rawsize(baserev)
3412 newlen = len(delta) - hlen
3415 newlen = len(delta) - hlen
3413 if delta[:hlen] != mdiff.replacediffheader(
3416 if delta[:hlen] != mdiff.replacediffheader(
3414 oldlen, newlen
3417 oldlen, newlen
3415 ):
3418 ):
3416 raise error.CensoredBaseError(
3419 raise error.CensoredBaseError(
3417 self.display_id, self.node(baserev)
3420 self.display_id, self.node(baserev)
3418 )
3421 )
3419
3422
3420 if not flags and self._peek_iscensored(baserev, delta):
3423 if not flags and self._peek_iscensored(baserev, delta):
3421 flags |= REVIDX_ISCENSORED
3424 flags |= REVIDX_ISCENSORED
3422
3425
3423 # We assume consumers of addrevisioncb will want to retrieve
3426 # We assume consumers of addrevisioncb will want to retrieve
3424 # the added revision, which will require a call to
3427 # the added revision, which will require a call to
3425 # revision(). revision() will fast path if there is a cache
3428 # revision(). revision() will fast path if there is a cache
3426 # hit. So, we tell _addrevision() to always cache in this case.
3429 # hit. So, we tell _addrevision() to always cache in this case.
3427 # We're only using addgroup() in the context of changegroup
3430 # We're only using addgroup() in the context of changegroup
3428 # generation so the revision data can always be handled as raw
3431 # generation so the revision data can always be handled as raw
3429 # by the flagprocessor.
3432 # by the flagprocessor.
3430 rev = self._addrevision(
3433 rev = self._addrevision(
3431 node,
3434 node,
3432 None,
3435 None,
3433 transaction,
3436 transaction,
3434 link,
3437 link,
3435 p1,
3438 p1,
3436 p2,
3439 p2,
3437 flags,
3440 flags,
3438 (baserev, delta, delta_base_reuse_policy),
3441 (baserev, delta, delta_base_reuse_policy),
3439 alwayscache=alwayscache,
3442 alwayscache=alwayscache,
3440 deltacomputer=deltacomputer,
3443 deltacomputer=deltacomputer,
3441 sidedata=sidedata,
3444 sidedata=sidedata,
3442 )
3445 )
3443
3446
3444 if addrevisioncb:
3447 if addrevisioncb:
3445 addrevisioncb(self, rev)
3448 addrevisioncb(self, rev)
3446 empty = False
3449 empty = False
3447 finally:
3450 finally:
3448 self._adding_group = False
3451 self._adding_group = False
3449 return not empty
3452 return not empty
3450
3453
3451 def iscensored(self, rev):
3454 def iscensored(self, rev):
3452 """Check if a file revision is censored."""
3455 """Check if a file revision is censored."""
3453 if not self.feature_config.censorable:
3456 if not self.feature_config.censorable:
3454 return False
3457 return False
3455
3458
3456 return self.flags(rev) & REVIDX_ISCENSORED
3459 return self.flags(rev) & REVIDX_ISCENSORED
3457
3460
3458 def _peek_iscensored(self, baserev, delta):
3461 def _peek_iscensored(self, baserev, delta):
3459 """Quickly check if a delta produces a censored revision."""
3462 """Quickly check if a delta produces a censored revision."""
3460 if not self.feature_config.censorable:
3463 if not self.feature_config.censorable:
3461 return False
3464 return False
3462
3465
3463 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3466 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3464
3467
3465 def getstrippoint(self, minlink):
3468 def getstrippoint(self, minlink):
3466 """find the minimum rev that must be stripped to strip the linkrev
3469 """find the minimum rev that must be stripped to strip the linkrev
3467
3470
3468 Returns a tuple containing the minimum rev and a set of all revs that
3471 Returns a tuple containing the minimum rev and a set of all revs that
3469 have linkrevs that will be broken by this strip.
3472 have linkrevs that will be broken by this strip.
3470 """
3473 """
3471 return storageutil.resolvestripinfo(
3474 return storageutil.resolvestripinfo(
3472 minlink,
3475 minlink,
3473 len(self) - 1,
3476 len(self) - 1,
3474 self.headrevs(),
3477 self.headrevs(),
3475 self.linkrev,
3478 self.linkrev,
3476 self.parentrevs,
3479 self.parentrevs,
3477 )
3480 )
3478
3481
3479 def strip(self, minlink, transaction):
3482 def strip(self, minlink, transaction):
3480 """truncate the revlog on the first revision with a linkrev >= minlink
3483 """truncate the revlog on the first revision with a linkrev >= minlink
3481
3484
3482 This function is called when we're stripping revision minlink and
3485 This function is called when we're stripping revision minlink and
3483 its descendants from the repository.
3486 its descendants from the repository.
3484
3487
3485 We have to remove all revisions with linkrev >= minlink, because
3488 We have to remove all revisions with linkrev >= minlink, because
3486 the equivalent changelog revisions will be renumbered after the
3489 the equivalent changelog revisions will be renumbered after the
3487 strip.
3490 strip.
3488
3491
3489 So we truncate the revlog on the first of these revisions, and
3492 So we truncate the revlog on the first of these revisions, and
3490 trust that the caller has saved the revisions that shouldn't be
3493 trust that the caller has saved the revisions that shouldn't be
3491 removed and that it'll re-add them after this truncation.
3494 removed and that it'll re-add them after this truncation.
3492 """
3495 """
3493 if len(self) == 0:
3496 if len(self) == 0:
3494 return
3497 return
3495
3498
3496 rev, _ = self.getstrippoint(minlink)
3499 rev, _ = self.getstrippoint(minlink)
3497 if rev == len(self):
3500 if rev == len(self):
3498 return
3501 return
3499
3502
3500 # first truncate the files on disk
3503 # first truncate the files on disk
3501 data_end = self.start(rev)
3504 data_end = self.start(rev)
3502 if not self._inline:
3505 if not self._inline:
3503 transaction.add(self._datafile, data_end)
3506 transaction.add(self._datafile, data_end)
3504 end = rev * self.index.entry_size
3507 end = rev * self.index.entry_size
3505 else:
3508 else:
3506 end = data_end + (rev * self.index.entry_size)
3509 end = data_end + (rev * self.index.entry_size)
3507
3510
3508 if self._sidedatafile:
3511 if self._sidedatafile:
3509 sidedata_end = self.sidedata_cut_off(rev)
3512 sidedata_end = self.sidedata_cut_off(rev)
3510 transaction.add(self._sidedatafile, sidedata_end)
3513 transaction.add(self._sidedatafile, sidedata_end)
3511
3514
3512 transaction.add(self._indexfile, end)
3515 transaction.add(self._indexfile, end)
3513 if self._docket is not None:
3516 if self._docket is not None:
3514 # XXX we could, leverage the docket while stripping. However it is
3517 # XXX we could, leverage the docket while stripping. However it is
3515 # not powerfull enough at the time of this comment
3518 # not powerfull enough at the time of this comment
3516 self._docket.index_end = end
3519 self._docket.index_end = end
3517 self._docket.data_end = data_end
3520 self._docket.data_end = data_end
3518 self._docket.sidedata_end = sidedata_end
3521 self._docket.sidedata_end = sidedata_end
3519 self._docket.write(transaction, stripping=True)
3522 self._docket.write(transaction, stripping=True)
3520
3523
3521 # then reset internal state in memory to forget those revisions
3524 # then reset internal state in memory to forget those revisions
3522 self._chaininfocache = util.lrucachedict(500)
3525 self._chaininfocache = util.lrucachedict(500)
3523 self._inner.clear_cache()
3526 self._inner.clear_cache()
3524
3527
3525 del self.index[rev:-1]
3528 del self.index[rev:-1]
3526
3529
3527 def checksize(self):
3530 def checksize(self):
3528 """Check size of index and data files
3531 """Check size of index and data files
3529
3532
3530 return a (dd, di) tuple.
3533 return a (dd, di) tuple.
3531 - dd: extra bytes for the "data" file
3534 - dd: extra bytes for the "data" file
3532 - di: extra bytes for the "index" file
3535 - di: extra bytes for the "index" file
3533
3536
3534 A healthy revlog will return (0, 0).
3537 A healthy revlog will return (0, 0).
3535 """
3538 """
3536 expected = 0
3539 expected = 0
3537 if len(self):
3540 if len(self):
3538 expected = max(0, self.end(len(self) - 1))
3541 expected = max(0, self.end(len(self) - 1))
3539
3542
3540 try:
3543 try:
3541 with self._datafp() as f:
3544 with self._datafp() as f:
3542 f.seek(0, io.SEEK_END)
3545 f.seek(0, io.SEEK_END)
3543 actual = f.tell()
3546 actual = f.tell()
3544 dd = actual - expected
3547 dd = actual - expected
3545 except FileNotFoundError:
3548 except FileNotFoundError:
3546 dd = 0
3549 dd = 0
3547
3550
3548 try:
3551 try:
3549 f = self.opener(self._indexfile)
3552 f = self.opener(self._indexfile)
3550 f.seek(0, io.SEEK_END)
3553 f.seek(0, io.SEEK_END)
3551 actual = f.tell()
3554 actual = f.tell()
3552 f.close()
3555 f.close()
3553 s = self.index.entry_size
3556 s = self.index.entry_size
3554 i = max(0, actual // s)
3557 i = max(0, actual // s)
3555 di = actual - (i * s)
3558 di = actual - (i * s)
3556 if self._inline:
3559 if self._inline:
3557 databytes = 0
3560 databytes = 0
3558 for r in self:
3561 for r in self:
3559 databytes += max(0, self.length(r))
3562 databytes += max(0, self.length(r))
3560 dd = 0
3563 dd = 0
3561 di = actual - len(self) * s - databytes
3564 di = actual - len(self) * s - databytes
3562 except FileNotFoundError:
3565 except FileNotFoundError:
3563 di = 0
3566 di = 0
3564
3567
3565 return (dd, di)
3568 return (dd, di)
3566
3569
3567 def files(self):
3570 def files(self):
3568 """return list of files that compose this revlog"""
3571 """return list of files that compose this revlog"""
3569 res = [self._indexfile]
3572 res = [self._indexfile]
3570 if self._docket_file is None:
3573 if self._docket_file is None:
3571 if not self._inline:
3574 if not self._inline:
3572 res.append(self._datafile)
3575 res.append(self._datafile)
3573 else:
3576 else:
3574 res.append(self._docket_file)
3577 res.append(self._docket_file)
3575 res.extend(self._docket.old_index_filepaths(include_empty=False))
3578 res.extend(self._docket.old_index_filepaths(include_empty=False))
3576 if self._docket.data_end:
3579 if self._docket.data_end:
3577 res.append(self._datafile)
3580 res.append(self._datafile)
3578 res.extend(self._docket.old_data_filepaths(include_empty=False))
3581 res.extend(self._docket.old_data_filepaths(include_empty=False))
3579 if self._docket.sidedata_end:
3582 if self._docket.sidedata_end:
3580 res.append(self._sidedatafile)
3583 res.append(self._sidedatafile)
3581 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3584 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3582 return res
3585 return res
3583
3586
3584 def emitrevisions(
3587 def emitrevisions(
3585 self,
3588 self,
3586 nodes,
3589 nodes,
3587 nodesorder=None,
3590 nodesorder=None,
3588 revisiondata=False,
3591 revisiondata=False,
3589 assumehaveparentrevisions=False,
3592 assumehaveparentrevisions=False,
3590 deltamode=repository.CG_DELTAMODE_STD,
3593 deltamode=repository.CG_DELTAMODE_STD,
3591 sidedata_helpers=None,
3594 sidedata_helpers=None,
3592 debug_info=None,
3595 debug_info=None,
3593 ):
3596 ):
3594 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3597 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3595 raise error.ProgrammingError(
3598 raise error.ProgrammingError(
3596 b'unhandled value for nodesorder: %s' % nodesorder
3599 b'unhandled value for nodesorder: %s' % nodesorder
3597 )
3600 )
3598
3601
3599 if nodesorder is None and not self.delta_config.general_delta:
3602 if nodesorder is None and not self.delta_config.general_delta:
3600 nodesorder = b'storage'
3603 nodesorder = b'storage'
3601
3604
3602 if (
3605 if (
3603 not self._storedeltachains
3606 not self._storedeltachains
3604 and deltamode != repository.CG_DELTAMODE_PREV
3607 and deltamode != repository.CG_DELTAMODE_PREV
3605 ):
3608 ):
3606 deltamode = repository.CG_DELTAMODE_FULL
3609 deltamode = repository.CG_DELTAMODE_FULL
3607
3610
3608 return storageutil.emitrevisions(
3611 return storageutil.emitrevisions(
3609 self,
3612 self,
3610 nodes,
3613 nodes,
3611 nodesorder,
3614 nodesorder,
3612 revlogrevisiondelta,
3615 revlogrevisiondelta,
3613 deltaparentfn=self.deltaparent,
3616 deltaparentfn=self.deltaparent,
3614 candeltafn=self._candelta,
3617 candeltafn=self._candelta,
3615 rawsizefn=self.rawsize,
3618 rawsizefn=self.rawsize,
3616 revdifffn=self.revdiff,
3619 revdifffn=self.revdiff,
3617 flagsfn=self.flags,
3620 flagsfn=self.flags,
3618 deltamode=deltamode,
3621 deltamode=deltamode,
3619 revisiondata=revisiondata,
3622 revisiondata=revisiondata,
3620 assumehaveparentrevisions=assumehaveparentrevisions,
3623 assumehaveparentrevisions=assumehaveparentrevisions,
3621 sidedata_helpers=sidedata_helpers,
3624 sidedata_helpers=sidedata_helpers,
3622 debug_info=debug_info,
3625 debug_info=debug_info,
3623 )
3626 )
3624
3627
3625 DELTAREUSEALWAYS = b'always'
3628 DELTAREUSEALWAYS = b'always'
3626 DELTAREUSESAMEREVS = b'samerevs'
3629 DELTAREUSESAMEREVS = b'samerevs'
3627 DELTAREUSENEVER = b'never'
3630 DELTAREUSENEVER = b'never'
3628
3631
3629 DELTAREUSEFULLADD = b'fulladd'
3632 DELTAREUSEFULLADD = b'fulladd'
3630
3633
3631 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3634 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3632
3635
3633 def clone(
3636 def clone(
3634 self,
3637 self,
3635 tr,
3638 tr,
3636 destrevlog,
3639 destrevlog,
3637 addrevisioncb=None,
3640 addrevisioncb=None,
3638 deltareuse=DELTAREUSESAMEREVS,
3641 deltareuse=DELTAREUSESAMEREVS,
3639 forcedeltabothparents=None,
3642 forcedeltabothparents=None,
3640 sidedata_helpers=None,
3643 sidedata_helpers=None,
3641 ):
3644 ):
3642 """Copy this revlog to another, possibly with format changes.
3645 """Copy this revlog to another, possibly with format changes.
3643
3646
3644 The destination revlog will contain the same revisions and nodes.
3647 The destination revlog will contain the same revisions and nodes.
3645 However, it may not be bit-for-bit identical due to e.g. delta encoding
3648 However, it may not be bit-for-bit identical due to e.g. delta encoding
3646 differences.
3649 differences.
3647
3650
3648 The ``deltareuse`` argument control how deltas from the existing revlog
3651 The ``deltareuse`` argument control how deltas from the existing revlog
3649 are preserved in the destination revlog. The argument can have the
3652 are preserved in the destination revlog. The argument can have the
3650 following values:
3653 following values:
3651
3654
3652 DELTAREUSEALWAYS
3655 DELTAREUSEALWAYS
3653 Deltas will always be reused (if possible), even if the destination
3656 Deltas will always be reused (if possible), even if the destination
3654 revlog would not select the same revisions for the delta. This is the
3657 revlog would not select the same revisions for the delta. This is the
3655 fastest mode of operation.
3658 fastest mode of operation.
3656 DELTAREUSESAMEREVS
3659 DELTAREUSESAMEREVS
3657 Deltas will be reused if the destination revlog would pick the same
3660 Deltas will be reused if the destination revlog would pick the same
3658 revisions for the delta. This mode strikes a balance between speed
3661 revisions for the delta. This mode strikes a balance between speed
3659 and optimization.
3662 and optimization.
3660 DELTAREUSENEVER
3663 DELTAREUSENEVER
3661 Deltas will never be reused. This is the slowest mode of execution.
3664 Deltas will never be reused. This is the slowest mode of execution.
3662 This mode can be used to recompute deltas (e.g. if the diff/delta
3665 This mode can be used to recompute deltas (e.g. if the diff/delta
3663 algorithm changes).
3666 algorithm changes).
3664 DELTAREUSEFULLADD
3667 DELTAREUSEFULLADD
3665 Revision will be re-added as if their were new content. This is
3668 Revision will be re-added as if their were new content. This is
3666 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3669 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3667 eg: large file detection and handling.
3670 eg: large file detection and handling.
3668
3671
3669 Delta computation can be slow, so the choice of delta reuse policy can
3672 Delta computation can be slow, so the choice of delta reuse policy can
3670 significantly affect run time.
3673 significantly affect run time.
3671
3674
3672 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3675 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3673 two extremes. Deltas will be reused if they are appropriate. But if the
3676 two extremes. Deltas will be reused if they are appropriate. But if the
3674 delta could choose a better revision, it will do so. This means if you
3677 delta could choose a better revision, it will do so. This means if you
3675 are converting a non-generaldelta revlog to a generaldelta revlog,
3678 are converting a non-generaldelta revlog to a generaldelta revlog,
3676 deltas will be recomputed if the delta's parent isn't a parent of the
3679 deltas will be recomputed if the delta's parent isn't a parent of the
3677 revision.
3680 revision.
3678
3681
3679 In addition to the delta policy, the ``forcedeltabothparents``
3682 In addition to the delta policy, the ``forcedeltabothparents``
3680 argument controls whether to force compute deltas against both parents
3683 argument controls whether to force compute deltas against both parents
3681 for merges. By default, the current default is used.
3684 for merges. By default, the current default is used.
3682
3685
3683 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3686 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3684 `sidedata_helpers`.
3687 `sidedata_helpers`.
3685 """
3688 """
3686 if deltareuse not in self.DELTAREUSEALL:
3689 if deltareuse not in self.DELTAREUSEALL:
3687 raise ValueError(
3690 raise ValueError(
3688 _(b'value for deltareuse invalid: %s') % deltareuse
3691 _(b'value for deltareuse invalid: %s') % deltareuse
3689 )
3692 )
3690
3693
3691 if len(destrevlog):
3694 if len(destrevlog):
3692 raise ValueError(_(b'destination revlog is not empty'))
3695 raise ValueError(_(b'destination revlog is not empty'))
3693
3696
3694 if getattr(self, 'filteredrevs', None):
3697 if getattr(self, 'filteredrevs', None):
3695 raise ValueError(_(b'source revlog has filtered revisions'))
3698 raise ValueError(_(b'source revlog has filtered revisions'))
3696 if getattr(destrevlog, 'filteredrevs', None):
3699 if getattr(destrevlog, 'filteredrevs', None):
3697 raise ValueError(_(b'destination revlog has filtered revisions'))
3700 raise ValueError(_(b'destination revlog has filtered revisions'))
3698
3701
3699 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3702 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3700 # if possible.
3703 # if possible.
3701 old_delta_config = destrevlog.delta_config
3704 old_delta_config = destrevlog.delta_config
3702 destrevlog.delta_config = destrevlog.delta_config.copy()
3705 destrevlog.delta_config = destrevlog.delta_config.copy()
3703
3706
3704 try:
3707 try:
3705 if deltareuse == self.DELTAREUSEALWAYS:
3708 if deltareuse == self.DELTAREUSEALWAYS:
3706 destrevlog.delta_config.lazy_delta_base = True
3709 destrevlog.delta_config.lazy_delta_base = True
3707 destrevlog.delta_config.lazy_delta = True
3710 destrevlog.delta_config.lazy_delta = True
3708 elif deltareuse == self.DELTAREUSESAMEREVS:
3711 elif deltareuse == self.DELTAREUSESAMEREVS:
3709 destrevlog.delta_config.lazy_delta_base = False
3712 destrevlog.delta_config.lazy_delta_base = False
3710 destrevlog.delta_config.lazy_delta = True
3713 destrevlog.delta_config.lazy_delta = True
3711 elif deltareuse == self.DELTAREUSENEVER:
3714 elif deltareuse == self.DELTAREUSENEVER:
3712 destrevlog.delta_config.lazy_delta_base = False
3715 destrevlog.delta_config.lazy_delta_base = False
3713 destrevlog.delta_config.lazy_delta = False
3716 destrevlog.delta_config.lazy_delta = False
3714
3717
3715 delta_both_parents = (
3718 delta_both_parents = (
3716 forcedeltabothparents or old_delta_config.delta_both_parents
3719 forcedeltabothparents or old_delta_config.delta_both_parents
3717 )
3720 )
3718 destrevlog.delta_config.delta_both_parents = delta_both_parents
3721 destrevlog.delta_config.delta_both_parents = delta_both_parents
3719
3722
3720 with self.reading(), destrevlog._writing(tr):
3723 with self.reading(), destrevlog._writing(tr):
3721 self._clone(
3724 self._clone(
3722 tr,
3725 tr,
3723 destrevlog,
3726 destrevlog,
3724 addrevisioncb,
3727 addrevisioncb,
3725 deltareuse,
3728 deltareuse,
3726 forcedeltabothparents,
3729 forcedeltabothparents,
3727 sidedata_helpers,
3730 sidedata_helpers,
3728 )
3731 )
3729
3732
3730 finally:
3733 finally:
3731 destrevlog.delta_config = old_delta_config
3734 destrevlog.delta_config = old_delta_config
3732
3735
3733 def _clone(
3736 def _clone(
3734 self,
3737 self,
3735 tr,
3738 tr,
3736 destrevlog,
3739 destrevlog,
3737 addrevisioncb,
3740 addrevisioncb,
3738 deltareuse,
3741 deltareuse,
3739 forcedeltabothparents,
3742 forcedeltabothparents,
3740 sidedata_helpers,
3743 sidedata_helpers,
3741 ):
3744 ):
3742 """perform the core duty of `revlog.clone` after parameter processing"""
3745 """perform the core duty of `revlog.clone` after parameter processing"""
3743 write_debug = None
3746 write_debug = None
3744 if self.delta_config.debug_delta:
3747 if self.delta_config.debug_delta:
3745 write_debug = tr._report
3748 write_debug = tr._report
3746 deltacomputer = deltautil.deltacomputer(
3749 deltacomputer = deltautil.deltacomputer(
3747 destrevlog,
3750 destrevlog,
3748 write_debug=write_debug,
3751 write_debug=write_debug,
3749 )
3752 )
3750 index = self.index
3753 index = self.index
3751 for rev in self:
3754 for rev in self:
3752 entry = index[rev]
3755 entry = index[rev]
3753
3756
3754 # Some classes override linkrev to take filtered revs into
3757 # Some classes override linkrev to take filtered revs into
3755 # account. Use raw entry from index.
3758 # account. Use raw entry from index.
3756 flags = entry[0] & 0xFFFF
3759 flags = entry[0] & 0xFFFF
3757 linkrev = entry[4]
3760 linkrev = entry[4]
3758 p1 = index[entry[5]][7]
3761 p1 = index[entry[5]][7]
3759 p2 = index[entry[6]][7]
3762 p2 = index[entry[6]][7]
3760 node = entry[7]
3763 node = entry[7]
3761
3764
3762 # (Possibly) reuse the delta from the revlog if allowed and
3765 # (Possibly) reuse the delta from the revlog if allowed and
3763 # the revlog chunk is a delta.
3766 # the revlog chunk is a delta.
3764 cachedelta = None
3767 cachedelta = None
3765 rawtext = None
3768 rawtext = None
3766 if deltareuse == self.DELTAREUSEFULLADD:
3769 if deltareuse == self.DELTAREUSEFULLADD:
3767 text = self._revisiondata(rev)
3770 text = self._revisiondata(rev)
3768 sidedata = self.sidedata(rev)
3771 sidedata = self.sidedata(rev)
3769
3772
3770 if sidedata_helpers is not None:
3773 if sidedata_helpers is not None:
3771 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3774 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3772 self, sidedata_helpers, sidedata, rev
3775 self, sidedata_helpers, sidedata, rev
3773 )
3776 )
3774 flags = flags | new_flags[0] & ~new_flags[1]
3777 flags = flags | new_flags[0] & ~new_flags[1]
3775
3778
3776 destrevlog.addrevision(
3779 destrevlog.addrevision(
3777 text,
3780 text,
3778 tr,
3781 tr,
3779 linkrev,
3782 linkrev,
3780 p1,
3783 p1,
3781 p2,
3784 p2,
3782 cachedelta=cachedelta,
3785 cachedelta=cachedelta,
3783 node=node,
3786 node=node,
3784 flags=flags,
3787 flags=flags,
3785 deltacomputer=deltacomputer,
3788 deltacomputer=deltacomputer,
3786 sidedata=sidedata,
3789 sidedata=sidedata,
3787 )
3790 )
3788 else:
3791 else:
3789 if destrevlog.delta_config.lazy_delta:
3792 if destrevlog.delta_config.lazy_delta:
3790 dp = self.deltaparent(rev)
3793 dp = self.deltaparent(rev)
3791 if dp != nullrev:
3794 if dp != nullrev:
3792 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3795 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3793
3796
3794 sidedata = None
3797 sidedata = None
3795 if not cachedelta:
3798 if not cachedelta:
3796 try:
3799 try:
3797 rawtext = self._revisiondata(rev)
3800 rawtext = self._revisiondata(rev)
3798 except error.CensoredNodeError as censored:
3801 except error.CensoredNodeError as censored:
3799 assert flags & REVIDX_ISCENSORED
3802 assert flags & REVIDX_ISCENSORED
3800 rawtext = censored.tombstone
3803 rawtext = censored.tombstone
3801 sidedata = self.sidedata(rev)
3804 sidedata = self.sidedata(rev)
3802 if sidedata is None:
3805 if sidedata is None:
3803 sidedata = self.sidedata(rev)
3806 sidedata = self.sidedata(rev)
3804
3807
3805 if sidedata_helpers is not None:
3808 if sidedata_helpers is not None:
3806 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3809 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3807 self, sidedata_helpers, sidedata, rev
3810 self, sidedata_helpers, sidedata, rev
3808 )
3811 )
3809 flags = flags | new_flags[0] & ~new_flags[1]
3812 flags = flags | new_flags[0] & ~new_flags[1]
3810
3813
3811 destrevlog._addrevision(
3814 destrevlog._addrevision(
3812 node,
3815 node,
3813 rawtext,
3816 rawtext,
3814 tr,
3817 tr,
3815 linkrev,
3818 linkrev,
3816 p1,
3819 p1,
3817 p2,
3820 p2,
3818 flags,
3821 flags,
3819 cachedelta,
3822 cachedelta,
3820 deltacomputer=deltacomputer,
3823 deltacomputer=deltacomputer,
3821 sidedata=sidedata,
3824 sidedata=sidedata,
3822 )
3825 )
3823
3826
3824 if addrevisioncb:
3827 if addrevisioncb:
3825 addrevisioncb(self, rev, node)
3828 addrevisioncb(self, rev, node)
3826
3829
3827 def censorrevision(self, tr, censornode, tombstone=b''):
3830 def censorrevision(self, tr, censornode, tombstone=b''):
3828 if self._format_version == REVLOGV0:
3831 if self._format_version == REVLOGV0:
3829 raise error.RevlogError(
3832 raise error.RevlogError(
3830 _(b'cannot censor with version %d revlogs')
3833 _(b'cannot censor with version %d revlogs')
3831 % self._format_version
3834 % self._format_version
3832 )
3835 )
3833 elif self._format_version == REVLOGV1:
3836 elif self._format_version == REVLOGV1:
3834 rewrite.v1_censor(self, tr, censornode, tombstone)
3837 rewrite.v1_censor(self, tr, censornode, tombstone)
3835 else:
3838 else:
3836 rewrite.v2_censor(self, tr, censornode, tombstone)
3839 rewrite.v2_censor(self, tr, censornode, tombstone)
3837
3840
3838 def verifyintegrity(self, state):
3841 def verifyintegrity(self, state):
3839 """Verifies the integrity of the revlog.
3842 """Verifies the integrity of the revlog.
3840
3843
3841 Yields ``revlogproblem`` instances describing problems that are
3844 Yields ``revlogproblem`` instances describing problems that are
3842 found.
3845 found.
3843 """
3846 """
3844 dd, di = self.checksize()
3847 dd, di = self.checksize()
3845 if dd:
3848 if dd:
3846 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3849 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3847 if di:
3850 if di:
3848 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3851 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3849
3852
3850 version = self._format_version
3853 version = self._format_version
3851
3854
3852 # The verifier tells us what version revlog we should be.
3855 # The verifier tells us what version revlog we should be.
3853 if version != state[b'expectedversion']:
3856 if version != state[b'expectedversion']:
3854 yield revlogproblem(
3857 yield revlogproblem(
3855 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3858 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3856 % (self.display_id, version, state[b'expectedversion'])
3859 % (self.display_id, version, state[b'expectedversion'])
3857 )
3860 )
3858
3861
3859 state[b'skipread'] = set()
3862 state[b'skipread'] = set()
3860 state[b'safe_renamed'] = set()
3863 state[b'safe_renamed'] = set()
3861
3864
3862 for rev in self:
3865 for rev in self:
3863 node = self.node(rev)
3866 node = self.node(rev)
3864
3867
3865 # Verify contents. 4 cases to care about:
3868 # Verify contents. 4 cases to care about:
3866 #
3869 #
3867 # common: the most common case
3870 # common: the most common case
3868 # rename: with a rename
3871 # rename: with a rename
3869 # meta: file content starts with b'\1\n', the metadata
3872 # meta: file content starts with b'\1\n', the metadata
3870 # header defined in filelog.py, but without a rename
3873 # header defined in filelog.py, but without a rename
3871 # ext: content stored externally
3874 # ext: content stored externally
3872 #
3875 #
3873 # More formally, their differences are shown below:
3876 # More formally, their differences are shown below:
3874 #
3877 #
3875 # | common | rename | meta | ext
3878 # | common | rename | meta | ext
3876 # -------------------------------------------------------
3879 # -------------------------------------------------------
3877 # flags() | 0 | 0 | 0 | not 0
3880 # flags() | 0 | 0 | 0 | not 0
3878 # renamed() | False | True | False | ?
3881 # renamed() | False | True | False | ?
3879 # rawtext[0:2]=='\1\n'| False | True | True | ?
3882 # rawtext[0:2]=='\1\n'| False | True | True | ?
3880 #
3883 #
3881 # "rawtext" means the raw text stored in revlog data, which
3884 # "rawtext" means the raw text stored in revlog data, which
3882 # could be retrieved by "rawdata(rev)". "text"
3885 # could be retrieved by "rawdata(rev)". "text"
3883 # mentioned below is "revision(rev)".
3886 # mentioned below is "revision(rev)".
3884 #
3887 #
3885 # There are 3 different lengths stored physically:
3888 # There are 3 different lengths stored physically:
3886 # 1. L1: rawsize, stored in revlog index
3889 # 1. L1: rawsize, stored in revlog index
3887 # 2. L2: len(rawtext), stored in revlog data
3890 # 2. L2: len(rawtext), stored in revlog data
3888 # 3. L3: len(text), stored in revlog data if flags==0, or
3891 # 3. L3: len(text), stored in revlog data if flags==0, or
3889 # possibly somewhere else if flags!=0
3892 # possibly somewhere else if flags!=0
3890 #
3893 #
3891 # L1 should be equal to L2. L3 could be different from them.
3894 # L1 should be equal to L2. L3 could be different from them.
3892 # "text" may or may not affect commit hash depending on flag
3895 # "text" may or may not affect commit hash depending on flag
3893 # processors (see flagutil.addflagprocessor).
3896 # processors (see flagutil.addflagprocessor).
3894 #
3897 #
3895 # | common | rename | meta | ext
3898 # | common | rename | meta | ext
3896 # -------------------------------------------------
3899 # -------------------------------------------------
3897 # rawsize() | L1 | L1 | L1 | L1
3900 # rawsize() | L1 | L1 | L1 | L1
3898 # size() | L1 | L2-LM | L1(*) | L1 (?)
3901 # size() | L1 | L2-LM | L1(*) | L1 (?)
3899 # len(rawtext) | L2 | L2 | L2 | L2
3902 # len(rawtext) | L2 | L2 | L2 | L2
3900 # len(text) | L2 | L2 | L2 | L3
3903 # len(text) | L2 | L2 | L2 | L3
3901 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3904 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3902 #
3905 #
3903 # LM: length of metadata, depending on rawtext
3906 # LM: length of metadata, depending on rawtext
3904 # (*): not ideal, see comment in filelog.size
3907 # (*): not ideal, see comment in filelog.size
3905 # (?): could be "- len(meta)" if the resolved content has
3908 # (?): could be "- len(meta)" if the resolved content has
3906 # rename metadata
3909 # rename metadata
3907 #
3910 #
3908 # Checks needed to be done:
3911 # Checks needed to be done:
3909 # 1. length check: L1 == L2, in all cases.
3912 # 1. length check: L1 == L2, in all cases.
3910 # 2. hash check: depending on flag processor, we may need to
3913 # 2. hash check: depending on flag processor, we may need to
3911 # use either "text" (external), or "rawtext" (in revlog).
3914 # use either "text" (external), or "rawtext" (in revlog).
3912
3915
3913 try:
3916 try:
3914 skipflags = state.get(b'skipflags', 0)
3917 skipflags = state.get(b'skipflags', 0)
3915 if skipflags:
3918 if skipflags:
3916 skipflags &= self.flags(rev)
3919 skipflags &= self.flags(rev)
3917
3920
3918 _verify_revision(self, skipflags, state, node)
3921 _verify_revision(self, skipflags, state, node)
3919
3922
3920 l1 = self.rawsize(rev)
3923 l1 = self.rawsize(rev)
3921 l2 = len(self.rawdata(node))
3924 l2 = len(self.rawdata(node))
3922
3925
3923 if l1 != l2:
3926 if l1 != l2:
3924 yield revlogproblem(
3927 yield revlogproblem(
3925 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3928 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3926 node=node,
3929 node=node,
3927 )
3930 )
3928
3931
3929 except error.CensoredNodeError:
3932 except error.CensoredNodeError:
3930 if state[b'erroroncensored']:
3933 if state[b'erroroncensored']:
3931 yield revlogproblem(
3934 yield revlogproblem(
3932 error=_(b'censored file data'), node=node
3935 error=_(b'censored file data'), node=node
3933 )
3936 )
3934 state[b'skipread'].add(node)
3937 state[b'skipread'].add(node)
3935 except Exception as e:
3938 except Exception as e:
3936 yield revlogproblem(
3939 yield revlogproblem(
3937 error=_(b'unpacking %s: %s')
3940 error=_(b'unpacking %s: %s')
3938 % (short(node), stringutil.forcebytestr(e)),
3941 % (short(node), stringutil.forcebytestr(e)),
3939 node=node,
3942 node=node,
3940 )
3943 )
3941 state[b'skipread'].add(node)
3944 state[b'skipread'].add(node)
3942
3945
3943 def storageinfo(
3946 def storageinfo(
3944 self,
3947 self,
3945 exclusivefiles=False,
3948 exclusivefiles=False,
3946 sharedfiles=False,
3949 sharedfiles=False,
3947 revisionscount=False,
3950 revisionscount=False,
3948 trackedsize=False,
3951 trackedsize=False,
3949 storedsize=False,
3952 storedsize=False,
3950 ):
3953 ):
3951 d = {}
3954 d = {}
3952
3955
3953 if exclusivefiles:
3956 if exclusivefiles:
3954 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3957 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3955 if not self._inline:
3958 if not self._inline:
3956 d[b'exclusivefiles'].append((self.opener, self._datafile))
3959 d[b'exclusivefiles'].append((self.opener, self._datafile))
3957
3960
3958 if sharedfiles:
3961 if sharedfiles:
3959 d[b'sharedfiles'] = []
3962 d[b'sharedfiles'] = []
3960
3963
3961 if revisionscount:
3964 if revisionscount:
3962 d[b'revisionscount'] = len(self)
3965 d[b'revisionscount'] = len(self)
3963
3966
3964 if trackedsize:
3967 if trackedsize:
3965 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3968 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3966
3969
3967 if storedsize:
3970 if storedsize:
3968 d[b'storedsize'] = sum(
3971 d[b'storedsize'] = sum(
3969 self.opener.stat(path).st_size for path in self.files()
3972 self.opener.stat(path).st_size for path in self.files()
3970 )
3973 )
3971
3974
3972 return d
3975 return d
3973
3976
3974 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3977 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3975 if not self.feature_config.has_side_data:
3978 if not self.feature_config.has_side_data:
3976 return
3979 return
3977 # revlog formats with sidedata support does not support inline
3980 # revlog formats with sidedata support does not support inline
3978 assert not self._inline
3981 assert not self._inline
3979 if not helpers[1] and not helpers[2]:
3982 if not helpers[1] and not helpers[2]:
3980 # Nothing to generate or remove
3983 # Nothing to generate or remove
3981 return
3984 return
3982
3985
3983 new_entries = []
3986 new_entries = []
3984 # append the new sidedata
3987 # append the new sidedata
3985 with self._writing(transaction):
3988 with self._writing(transaction):
3986 ifh, dfh, sdfh = self._inner._writinghandles
3989 ifh, dfh, sdfh = self._inner._writinghandles
3987 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3990 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3988
3991
3989 current_offset = sdfh.tell()
3992 current_offset = sdfh.tell()
3990 for rev in range(startrev, endrev + 1):
3993 for rev in range(startrev, endrev + 1):
3991 entry = self.index[rev]
3994 entry = self.index[rev]
3992 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3995 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3993 store=self,
3996 store=self,
3994 sidedata_helpers=helpers,
3997 sidedata_helpers=helpers,
3995 sidedata={},
3998 sidedata={},
3996 rev=rev,
3999 rev=rev,
3997 )
4000 )
3998
4001
3999 serialized_sidedata = sidedatautil.serialize_sidedata(
4002 serialized_sidedata = sidedatautil.serialize_sidedata(
4000 new_sidedata
4003 new_sidedata
4001 )
4004 )
4002
4005
4003 sidedata_compression_mode = COMP_MODE_INLINE
4006 sidedata_compression_mode = COMP_MODE_INLINE
4004 if serialized_sidedata and self.feature_config.has_side_data:
4007 if serialized_sidedata and self.feature_config.has_side_data:
4005 sidedata_compression_mode = COMP_MODE_PLAIN
4008 sidedata_compression_mode = COMP_MODE_PLAIN
4006 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4009 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4007 if (
4010 if (
4008 h != b'u'
4011 h != b'u'
4009 and comp_sidedata[0] != b'\0'
4012 and comp_sidedata[0] != b'\0'
4010 and len(comp_sidedata) < len(serialized_sidedata)
4013 and len(comp_sidedata) < len(serialized_sidedata)
4011 ):
4014 ):
4012 assert not h
4015 assert not h
4013 if (
4016 if (
4014 comp_sidedata[0]
4017 comp_sidedata[0]
4015 == self._docket.default_compression_header
4018 == self._docket.default_compression_header
4016 ):
4019 ):
4017 sidedata_compression_mode = COMP_MODE_DEFAULT
4020 sidedata_compression_mode = COMP_MODE_DEFAULT
4018 serialized_sidedata = comp_sidedata
4021 serialized_sidedata = comp_sidedata
4019 else:
4022 else:
4020 sidedata_compression_mode = COMP_MODE_INLINE
4023 sidedata_compression_mode = COMP_MODE_INLINE
4021 serialized_sidedata = comp_sidedata
4024 serialized_sidedata = comp_sidedata
4022 if entry[8] != 0 or entry[9] != 0:
4025 if entry[8] != 0 or entry[9] != 0:
4023 # rewriting entries that already have sidedata is not
4026 # rewriting entries that already have sidedata is not
4024 # supported yet, because it introduces garbage data in the
4027 # supported yet, because it introduces garbage data in the
4025 # revlog.
4028 # revlog.
4026 msg = b"rewriting existing sidedata is not supported yet"
4029 msg = b"rewriting existing sidedata is not supported yet"
4027 raise error.Abort(msg)
4030 raise error.Abort(msg)
4028
4031
4029 # Apply (potential) flags to add and to remove after running
4032 # Apply (potential) flags to add and to remove after running
4030 # the sidedata helpers
4033 # the sidedata helpers
4031 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4034 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4032 entry_update = (
4035 entry_update = (
4033 current_offset,
4036 current_offset,
4034 len(serialized_sidedata),
4037 len(serialized_sidedata),
4035 new_offset_flags,
4038 new_offset_flags,
4036 sidedata_compression_mode,
4039 sidedata_compression_mode,
4037 )
4040 )
4038
4041
4039 # the sidedata computation might have move the file cursors around
4042 # the sidedata computation might have move the file cursors around
4040 sdfh.seek(current_offset, os.SEEK_SET)
4043 sdfh.seek(current_offset, os.SEEK_SET)
4041 sdfh.write(serialized_sidedata)
4044 sdfh.write(serialized_sidedata)
4042 new_entries.append(entry_update)
4045 new_entries.append(entry_update)
4043 current_offset += len(serialized_sidedata)
4046 current_offset += len(serialized_sidedata)
4044 self._docket.sidedata_end = sdfh.tell()
4047 self._docket.sidedata_end = sdfh.tell()
4045
4048
4046 # rewrite the new index entries
4049 # rewrite the new index entries
4047 ifh.seek(startrev * self.index.entry_size)
4050 ifh.seek(startrev * self.index.entry_size)
4048 for i, e in enumerate(new_entries):
4051 for i, e in enumerate(new_entries):
4049 rev = startrev + i
4052 rev = startrev + i
4050 self.index.replace_sidedata_info(rev, *e)
4053 self.index.replace_sidedata_info(rev, *e)
4051 packed = self.index.entry_binary(rev)
4054 packed = self.index.entry_binary(rev)
4052 if rev == 0 and self._docket is None:
4055 if rev == 0 and self._docket is None:
4053 header = self._format_flags | self._format_version
4056 header = self._format_flags | self._format_version
4054 header = self.index.pack_header(header)
4057 header = self.index.pack_header(header)
4055 packed = header + packed
4058 packed = header + packed
4056 ifh.write(packed)
4059 ifh.write(packed)
@@ -1,38 +1,38 b''
1 // debugdata.rs
1 // debugdata.rs
2 //
2 //
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
3 // Copyright 2020 Antoine Cezar <antoine.cezar@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::repo::Repo;
8 use crate::repo::Repo;
9 use crate::requirements;
10 use crate::revlog::{Revlog, RevlogError};
9 use crate::revlog::{Revlog, RevlogError};
11
10
12 /// Kind of data to debug
11 /// Kind of data to debug
13 #[derive(Debug, Copy, Clone)]
12 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
14 pub enum DebugDataKind {
13 pub enum DebugDataKind {
15 Changelog,
14 Changelog,
16 Manifest,
15 Manifest,
17 }
16 }
18
17
19 /// Dump the contents data of a revision.
18 /// Dump the contents data of a revision.
20 pub fn debug_data(
19 pub fn debug_data(
21 repo: &Repo,
20 repo: &Repo,
22 revset: &str,
21 revset: &str,
23 kind: DebugDataKind,
22 kind: DebugDataKind,
24 ) -> Result<Vec<u8>, RevlogError> {
23 ) -> Result<Vec<u8>, RevlogError> {
25 let index_file = match kind {
24 let index_file = match kind {
26 DebugDataKind::Changelog => "00changelog.i",
25 DebugDataKind::Changelog => "00changelog.i",
27 DebugDataKind::Manifest => "00manifest.i",
26 DebugDataKind::Manifest => "00manifest.i",
28 };
27 };
29 let use_nodemap = repo
28 let revlog = Revlog::open(
30 .requirements()
29 &repo.store_vfs(),
31 .contains(requirements::NODEMAP_REQUIREMENT);
30 index_file,
32 let revlog =
31 None,
33 Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?;
32 repo.default_revlog_options(kind == DebugDataKind::Changelog)?,
33 )?;
34 let rev =
34 let rev =
35 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
35 crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?;
36 let data = revlog.get_rev_data_for_checked_rev(rev)?;
36 let data = revlog.get_rev_data_for_checked_rev(rev)?;
37 Ok(data.into_owned())
37 Ok(data.into_owned())
38 }
38 }
@@ -1,782 +1,820 b''
1 use crate::changelog::Changelog;
1 use crate::changelog::Changelog;
2 use crate::config::{Config, ConfigError, ConfigParseError};
2 use crate::config::{Config, ConfigError, ConfigParseError};
3 use crate::dirstate::DirstateParents;
3 use crate::dirstate::DirstateParents;
4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
4 use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
5 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
6 use crate::dirstate_tree::owning::OwningDirstateMap;
6 use crate::dirstate_tree::owning::OwningDirstateMap;
7 use crate::errors::HgResultExt;
7 use crate::errors::HgResultExt;
8 use crate::errors::{HgError, IoResultExt};
8 use crate::errors::{HgError, IoResultExt};
9 use crate::lock::{try_with_lock_no_wait, LockError};
9 use crate::lock::{try_with_lock_no_wait, LockError};
10 use crate::manifest::{Manifest, Manifestlog};
10 use crate::manifest::{Manifest, Manifestlog};
11 use crate::requirements::{
12 CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT,
13 REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT,
14 };
11 use crate::revlog::filelog::Filelog;
15 use crate::revlog::filelog::Filelog;
12 use crate::revlog::RevlogError;
16 use crate::revlog::RevlogError;
13 use crate::utils::debug::debug_wait_for_file_or_print;
17 use crate::utils::debug::debug_wait_for_file_or_print;
14 use crate::utils::files::get_path_from_bytes;
18 use crate::utils::files::get_path_from_bytes;
15 use crate::utils::hg_path::HgPath;
19 use crate::utils::hg_path::HgPath;
16 use crate::utils::SliceExt;
20 use crate::utils::SliceExt;
17 use crate::vfs::{is_dir, is_file, Vfs};
21 use crate::vfs::{is_dir, is_file, Vfs};
18 use crate::DirstateError;
22 use crate::{
19 use crate::{requirements, NodePrefix, UncheckedRevision};
23 requirements, NodePrefix, RevlogVersionOptions, UncheckedRevision,
24 };
25 use crate::{DirstateError, RevlogOpenOptions};
20 use std::cell::{Ref, RefCell, RefMut};
26 use std::cell::{Ref, RefCell, RefMut};
21 use std::collections::HashSet;
27 use std::collections::HashSet;
22 use std::io::Seek;
28 use std::io::Seek;
23 use std::io::SeekFrom;
29 use std::io::SeekFrom;
24 use std::io::Write as IoWrite;
30 use std::io::Write as IoWrite;
25 use std::path::{Path, PathBuf};
31 use std::path::{Path, PathBuf};
26
32
27 const V2_MAX_READ_ATTEMPTS: usize = 5;
33 const V2_MAX_READ_ATTEMPTS: usize = 5;
28
34
29 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
35 type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
30
36
31 /// A repository on disk
37 /// A repository on disk
32 pub struct Repo {
38 pub struct Repo {
33 working_directory: PathBuf,
39 working_directory: PathBuf,
34 dot_hg: PathBuf,
40 dot_hg: PathBuf,
35 store: PathBuf,
41 store: PathBuf,
36 requirements: HashSet<String>,
42 requirements: HashSet<String>,
37 config: Config,
43 config: Config,
38 dirstate_parents: LazyCell<DirstateParents>,
44 dirstate_parents: LazyCell<DirstateParents>,
39 dirstate_map: LazyCell<OwningDirstateMap>,
45 dirstate_map: LazyCell<OwningDirstateMap>,
40 changelog: LazyCell<Changelog>,
46 changelog: LazyCell<Changelog>,
41 manifestlog: LazyCell<Manifestlog>,
47 manifestlog: LazyCell<Manifestlog>,
42 }
48 }
43
49
44 #[derive(Debug, derive_more::From)]
50 #[derive(Debug, derive_more::From)]
45 pub enum RepoError {
51 pub enum RepoError {
46 NotFound {
52 NotFound {
47 at: PathBuf,
53 at: PathBuf,
48 },
54 },
49 #[from]
55 #[from]
50 ConfigParseError(ConfigParseError),
56 ConfigParseError(ConfigParseError),
51 #[from]
57 #[from]
52 Other(HgError),
58 Other(HgError),
53 }
59 }
54
60
55 impl From<ConfigError> for RepoError {
61 impl From<ConfigError> for RepoError {
56 fn from(error: ConfigError) -> Self {
62 fn from(error: ConfigError) -> Self {
57 match error {
63 match error {
58 ConfigError::Parse(error) => error.into(),
64 ConfigError::Parse(error) => error.into(),
59 ConfigError::Other(error) => error.into(),
65 ConfigError::Other(error) => error.into(),
60 }
66 }
61 }
67 }
62 }
68 }
63
69
64 impl Repo {
70 impl Repo {
65 /// tries to find nearest repository root in current working directory or
71 /// tries to find nearest repository root in current working directory or
66 /// its ancestors
72 /// its ancestors
67 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
73 pub fn find_repo_root() -> Result<PathBuf, RepoError> {
68 let current_directory = crate::utils::current_dir()?;
74 let current_directory = crate::utils::current_dir()?;
69 // ancestors() is inclusive: it first yields `current_directory`
75 // ancestors() is inclusive: it first yields `current_directory`
70 // as-is.
76 // as-is.
71 for ancestor in current_directory.ancestors() {
77 for ancestor in current_directory.ancestors() {
72 if is_dir(ancestor.join(".hg"))? {
78 if is_dir(ancestor.join(".hg"))? {
73 return Ok(ancestor.to_path_buf());
79 return Ok(ancestor.to_path_buf());
74 }
80 }
75 }
81 }
76 Err(RepoError::NotFound {
82 Err(RepoError::NotFound {
77 at: current_directory,
83 at: current_directory,
78 })
84 })
79 }
85 }
80
86
81 /// Find a repository, either at the given path (which must contain a `.hg`
87 /// Find a repository, either at the given path (which must contain a `.hg`
82 /// sub-directory) or by searching the current directory and its
88 /// sub-directory) or by searching the current directory and its
83 /// ancestors.
89 /// ancestors.
84 ///
90 ///
85 /// A method with two very different "modes" like this usually a code smell
91 /// A method with two very different "modes" like this usually a code smell
86 /// to make two methods instead, but in this case an `Option` is what rhg
92 /// to make two methods instead, but in this case an `Option` is what rhg
87 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
93 /// sub-commands get from Clap for the `-R` / `--repository` CLI argument.
88 /// Having two methods would just move that `if` to almost all callers.
94 /// Having two methods would just move that `if` to almost all callers.
89 pub fn find(
95 pub fn find(
90 config: &Config,
96 config: &Config,
91 explicit_path: Option<PathBuf>,
97 explicit_path: Option<PathBuf>,
92 ) -> Result<Self, RepoError> {
98 ) -> Result<Self, RepoError> {
93 if let Some(root) = explicit_path {
99 if let Some(root) = explicit_path {
94 if is_dir(root.join(".hg"))? {
100 if is_dir(root.join(".hg"))? {
95 Self::new_at_path(root, config)
101 Self::new_at_path(root, config)
96 } else if is_file(&root)? {
102 } else if is_file(&root)? {
97 Err(HgError::unsupported("bundle repository").into())
103 Err(HgError::unsupported("bundle repository").into())
98 } else {
104 } else {
99 Err(RepoError::NotFound { at: root })
105 Err(RepoError::NotFound { at: root })
100 }
106 }
101 } else {
107 } else {
102 let root = Self::find_repo_root()?;
108 let root = Self::find_repo_root()?;
103 Self::new_at_path(root, config)
109 Self::new_at_path(root, config)
104 }
110 }
105 }
111 }
106
112
107 /// To be called after checking that `.hg` is a sub-directory
113 /// To be called after checking that `.hg` is a sub-directory
108 fn new_at_path(
114 fn new_at_path(
109 working_directory: PathBuf,
115 working_directory: PathBuf,
110 config: &Config,
116 config: &Config,
111 ) -> Result<Self, RepoError> {
117 ) -> Result<Self, RepoError> {
112 let dot_hg = working_directory.join(".hg");
118 let dot_hg = working_directory.join(".hg");
113
119
114 let mut repo_config_files =
120 let mut repo_config_files =
115 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
121 vec![dot_hg.join("hgrc"), dot_hg.join("hgrc-not-shared")];
116
122
117 let hg_vfs = Vfs { base: &dot_hg };
123 let hg_vfs = Vfs { base: &dot_hg };
118 let mut reqs = requirements::load_if_exists(hg_vfs)?;
124 let mut reqs = requirements::load_if_exists(hg_vfs)?;
119 let relative =
125 let relative =
120 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
126 reqs.contains(requirements::RELATIVE_SHARED_REQUIREMENT);
121 let shared =
127 let shared =
122 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
128 reqs.contains(requirements::SHARED_REQUIREMENT) || relative;
123
129
124 // From `mercurial/localrepo.py`:
130 // From `mercurial/localrepo.py`:
125 //
131 //
126 // if .hg/requires contains the sharesafe requirement, it means
132 // if .hg/requires contains the sharesafe requirement, it means
127 // there exists a `.hg/store/requires` too and we should read it
133 // there exists a `.hg/store/requires` too and we should read it
128 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
134 // NOTE: presence of SHARESAFE_REQUIREMENT imply that store requirement
129 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
135 // is present. We never write SHARESAFE_REQUIREMENT for a repo if store
130 // is not present, refer checkrequirementscompat() for that
136 // is not present, refer checkrequirementscompat() for that
131 //
137 //
132 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
138 // However, if SHARESAFE_REQUIREMENT is not present, it means that the
133 // repository was shared the old way. We check the share source
139 // repository was shared the old way. We check the share source
134 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
140 // .hg/requires for SHARESAFE_REQUIREMENT to detect whether the
135 // current repository needs to be reshared
141 // current repository needs to be reshared
136 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
142 let share_safe = reqs.contains(requirements::SHARESAFE_REQUIREMENT);
137
143
138 let store_path;
144 let store_path;
139 if !shared {
145 if !shared {
140 store_path = dot_hg.join("store");
146 store_path = dot_hg.join("store");
141 } else {
147 } else {
142 let bytes = hg_vfs.read("sharedpath")?;
148 let bytes = hg_vfs.read("sharedpath")?;
143 let mut shared_path =
149 let mut shared_path =
144 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
150 get_path_from_bytes(bytes.trim_end_matches(|b| b == b'\n'))
145 .to_owned();
151 .to_owned();
146 if relative {
152 if relative {
147 shared_path = dot_hg.join(shared_path)
153 shared_path = dot_hg.join(shared_path)
148 }
154 }
149 if !is_dir(&shared_path)? {
155 if !is_dir(&shared_path)? {
150 return Err(HgError::corrupted(format!(
156 return Err(HgError::corrupted(format!(
151 ".hg/sharedpath points to nonexistent directory {}",
157 ".hg/sharedpath points to nonexistent directory {}",
152 shared_path.display()
158 shared_path.display()
153 ))
159 ))
154 .into());
160 .into());
155 }
161 }
156
162
157 store_path = shared_path.join("store");
163 store_path = shared_path.join("store");
158
164
159 let source_is_share_safe =
165 let source_is_share_safe =
160 requirements::load(Vfs { base: &shared_path })?
166 requirements::load(Vfs { base: &shared_path })?
161 .contains(requirements::SHARESAFE_REQUIREMENT);
167 .contains(requirements::SHARESAFE_REQUIREMENT);
162
168
163 if share_safe != source_is_share_safe {
169 if share_safe != source_is_share_safe {
164 return Err(HgError::unsupported("share-safe mismatch").into());
170 return Err(HgError::unsupported("share-safe mismatch").into());
165 }
171 }
166
172
167 if share_safe {
173 if share_safe {
168 repo_config_files.insert(0, shared_path.join("hgrc"))
174 repo_config_files.insert(0, shared_path.join("hgrc"))
169 }
175 }
170 }
176 }
171 if share_safe {
177 if share_safe {
172 reqs.extend(requirements::load(Vfs { base: &store_path })?);
178 reqs.extend(requirements::load(Vfs { base: &store_path })?);
173 }
179 }
174
180
175 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
181 let repo_config = if std::env::var_os("HGRCSKIPREPO").is_none() {
176 config.combine_with_repo(&repo_config_files)?
182 config.combine_with_repo(&repo_config_files)?
177 } else {
183 } else {
178 config.clone()
184 config.clone()
179 };
185 };
180
186
181 let repo = Self {
187 let repo = Self {
182 requirements: reqs,
188 requirements: reqs,
183 working_directory,
189 working_directory,
184 store: store_path,
190 store: store_path,
185 dot_hg,
191 dot_hg,
186 config: repo_config,
192 config: repo_config,
187 dirstate_parents: LazyCell::new(),
193 dirstate_parents: LazyCell::new(),
188 dirstate_map: LazyCell::new(),
194 dirstate_map: LazyCell::new(),
189 changelog: LazyCell::new(),
195 changelog: LazyCell::new(),
190 manifestlog: LazyCell::new(),
196 manifestlog: LazyCell::new(),
191 };
197 };
192
198
193 requirements::check(&repo)?;
199 requirements::check(&repo)?;
194
200
195 Ok(repo)
201 Ok(repo)
196 }
202 }
197
203
198 pub fn working_directory_path(&self) -> &Path {
204 pub fn working_directory_path(&self) -> &Path {
199 &self.working_directory
205 &self.working_directory
200 }
206 }
201
207
202 pub fn requirements(&self) -> &HashSet<String> {
208 pub fn requirements(&self) -> &HashSet<String> {
203 &self.requirements
209 &self.requirements
204 }
210 }
205
211
206 pub fn config(&self) -> &Config {
212 pub fn config(&self) -> &Config {
207 &self.config
213 &self.config
208 }
214 }
209
215
210 /// For accessing repository files (in `.hg`), except for the store
216 /// For accessing repository files (in `.hg`), except for the store
211 /// (`.hg/store`).
217 /// (`.hg/store`).
212 pub fn hg_vfs(&self) -> Vfs<'_> {
218 pub fn hg_vfs(&self) -> Vfs<'_> {
213 Vfs { base: &self.dot_hg }
219 Vfs { base: &self.dot_hg }
214 }
220 }
215
221
216 /// For accessing repository store files (in `.hg/store`)
222 /// For accessing repository store files (in `.hg/store`)
217 pub fn store_vfs(&self) -> Vfs<'_> {
223 pub fn store_vfs(&self) -> Vfs<'_> {
218 Vfs { base: &self.store }
224 Vfs { base: &self.store }
219 }
225 }
220
226
221 /// For accessing the working copy
227 /// For accessing the working copy
222 pub fn working_directory_vfs(&self) -> Vfs<'_> {
228 pub fn working_directory_vfs(&self) -> Vfs<'_> {
223 Vfs {
229 Vfs {
224 base: &self.working_directory,
230 base: &self.working_directory,
225 }
231 }
226 }
232 }
227
233
228 pub fn try_with_wlock_no_wait<R>(
234 pub fn try_with_wlock_no_wait<R>(
229 &self,
235 &self,
230 f: impl FnOnce() -> R,
236 f: impl FnOnce() -> R,
231 ) -> Result<R, LockError> {
237 ) -> Result<R, LockError> {
232 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
238 try_with_lock_no_wait(self.hg_vfs(), "wlock", f)
233 }
239 }
234
240
235 /// Whether this repo should use dirstate-v2.
241 /// Whether this repo should use dirstate-v2.
236 /// The presence of `dirstate-v2` in the requirements does not mean that
242 /// The presence of `dirstate-v2` in the requirements does not mean that
237 /// the on-disk dirstate is necessarily in version 2. In most cases,
243 /// the on-disk dirstate is necessarily in version 2. In most cases,
238 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
244 /// a dirstate-v2 file will indeed be found, but in rare cases (like the
239 /// upgrade mechanism being cut short), the on-disk version will be a
245 /// upgrade mechanism being cut short), the on-disk version will be a
240 /// v1 file.
246 /// v1 file.
241 /// Semantically, having a requirement only means that a client cannot
247 /// Semantically, having a requirement only means that a client cannot
242 /// properly understand or properly update the repo if it lacks the support
248 /// properly understand or properly update the repo if it lacks the support
243 /// for the required feature, but not that that feature is actually used
249 /// for the required feature, but not that that feature is actually used
244 /// in all occasions.
250 /// in all occasions.
245 pub fn use_dirstate_v2(&self) -> bool {
251 pub fn use_dirstate_v2(&self) -> bool {
246 self.requirements
252 self.requirements
247 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
253 .contains(requirements::DIRSTATE_V2_REQUIREMENT)
248 }
254 }
249
255
250 pub fn has_sparse(&self) -> bool {
256 pub fn has_sparse(&self) -> bool {
251 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
257 self.requirements.contains(requirements::SPARSE_REQUIREMENT)
252 }
258 }
253
259
254 pub fn has_narrow(&self) -> bool {
260 pub fn has_narrow(&self) -> bool {
255 self.requirements.contains(requirements::NARROW_REQUIREMENT)
261 self.requirements.contains(requirements::NARROW_REQUIREMENT)
256 }
262 }
257
263
258 pub fn has_nodemap(&self) -> bool {
264 pub fn has_nodemap(&self) -> bool {
259 self.requirements
265 self.requirements
260 .contains(requirements::NODEMAP_REQUIREMENT)
266 .contains(requirements::NODEMAP_REQUIREMENT)
261 }
267 }
262
268
263 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
269 fn dirstate_file_contents(&self) -> Result<Vec<u8>, HgError> {
264 Ok(self
270 Ok(self
265 .hg_vfs()
271 .hg_vfs()
266 .read("dirstate")
272 .read("dirstate")
267 .io_not_found_as_none()?
273 .io_not_found_as_none()?
268 .unwrap_or_default())
274 .unwrap_or_default())
269 }
275 }
270
276
271 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
277 fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
272 use std::os::unix::fs::MetadataExt;
278 use std::os::unix::fs::MetadataExt;
273 Ok(self
279 Ok(self
274 .hg_vfs()
280 .hg_vfs()
275 .symlink_metadata("dirstate")
281 .symlink_metadata("dirstate")
276 .io_not_found_as_none()?
282 .io_not_found_as_none()?
277 .map(|meta| meta.ino()))
283 .map(|meta| meta.ino()))
278 }
284 }
279
285
280 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
286 pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
281 Ok(*self
287 Ok(*self
282 .dirstate_parents
288 .dirstate_parents
283 .get_or_init(|| self.read_dirstate_parents())?)
289 .get_or_init(|| self.read_dirstate_parents())?)
284 }
290 }
285
291
286 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
292 fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
287 let dirstate = self.dirstate_file_contents()?;
293 let dirstate = self.dirstate_file_contents()?;
288 let parents = if dirstate.is_empty() {
294 let parents = if dirstate.is_empty() {
289 DirstateParents::NULL
295 DirstateParents::NULL
290 } else if self.use_dirstate_v2() {
296 } else if self.use_dirstate_v2() {
291 let docket_res =
297 let docket_res =
292 crate::dirstate_tree::on_disk::read_docket(&dirstate);
298 crate::dirstate_tree::on_disk::read_docket(&dirstate);
293 match docket_res {
299 match docket_res {
294 Ok(docket) => docket.parents(),
300 Ok(docket) => docket.parents(),
295 Err(_) => {
301 Err(_) => {
296 log::info!(
302 log::info!(
297 "Parsing dirstate docket failed, \
303 "Parsing dirstate docket failed, \
298 falling back to dirstate-v1"
304 falling back to dirstate-v1"
299 );
305 );
300 *crate::dirstate::parsers::parse_dirstate_parents(
306 *crate::dirstate::parsers::parse_dirstate_parents(
301 &dirstate,
307 &dirstate,
302 )?
308 )?
303 }
309 }
304 }
310 }
305 } else {
311 } else {
306 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
312 *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
307 };
313 };
308 self.dirstate_parents.set(parents);
314 self.dirstate_parents.set(parents);
309 Ok(parents)
315 Ok(parents)
310 }
316 }
311
317
312 /// Returns the information read from the dirstate docket necessary to
318 /// Returns the information read from the dirstate docket necessary to
313 /// check if the data file has been updated/deleted by another process
319 /// check if the data file has been updated/deleted by another process
314 /// since we last read the dirstate.
320 /// since we last read the dirstate.
315 /// Namely, the inode, data file uuid and the data size.
321 /// Namely, the inode, data file uuid and the data size.
316 fn get_dirstate_data_file_integrity(
322 fn get_dirstate_data_file_integrity(
317 &self,
323 &self,
318 ) -> Result<DirstateMapIdentity, HgError> {
324 ) -> Result<DirstateMapIdentity, HgError> {
319 assert!(
325 assert!(
320 self.use_dirstate_v2(),
326 self.use_dirstate_v2(),
321 "accessing dirstate data file ID without dirstate-v2"
327 "accessing dirstate data file ID without dirstate-v2"
322 );
328 );
323 // Get the identity before the contents since we could have a race
329 // Get the identity before the contents since we could have a race
324 // between the two. Having an identity that is too old is fine, but
330 // between the two. Having an identity that is too old is fine, but
325 // one that is younger than the content change is bad.
331 // one that is younger than the content change is bad.
326 let identity = self.dirstate_identity()?;
332 let identity = self.dirstate_identity()?;
327 let dirstate = self.dirstate_file_contents()?;
333 let dirstate = self.dirstate_file_contents()?;
328 if dirstate.is_empty() {
334 if dirstate.is_empty() {
329 self.dirstate_parents.set(DirstateParents::NULL);
335 self.dirstate_parents.set(DirstateParents::NULL);
330 Ok((identity, None, 0))
336 Ok((identity, None, 0))
331 } else {
337 } else {
332 let docket_res =
338 let docket_res =
333 crate::dirstate_tree::on_disk::read_docket(&dirstate);
339 crate::dirstate_tree::on_disk::read_docket(&dirstate);
334 match docket_res {
340 match docket_res {
335 Ok(docket) => {
341 Ok(docket) => {
336 self.dirstate_parents.set(docket.parents());
342 self.dirstate_parents.set(docket.parents());
337 Ok((
343 Ok((
338 identity,
344 identity,
339 Some(docket.uuid.to_owned()),
345 Some(docket.uuid.to_owned()),
340 docket.data_size(),
346 docket.data_size(),
341 ))
347 ))
342 }
348 }
343 Err(_) => {
349 Err(_) => {
344 log::info!(
350 log::info!(
345 "Parsing dirstate docket failed, \
351 "Parsing dirstate docket failed, \
346 falling back to dirstate-v1"
352 falling back to dirstate-v1"
347 );
353 );
348 let parents =
354 let parents =
349 *crate::dirstate::parsers::parse_dirstate_parents(
355 *crate::dirstate::parsers::parse_dirstate_parents(
350 &dirstate,
356 &dirstate,
351 )?;
357 )?;
352 self.dirstate_parents.set(parents);
358 self.dirstate_parents.set(parents);
353 Ok((identity, None, 0))
359 Ok((identity, None, 0))
354 }
360 }
355 }
361 }
356 }
362 }
357 }
363 }
358
364
359 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
365 fn new_dirstate_map(&self) -> Result<OwningDirstateMap, DirstateError> {
360 if self.use_dirstate_v2() {
366 if self.use_dirstate_v2() {
361 // The v2 dirstate is split into a docket and a data file.
367 // The v2 dirstate is split into a docket and a data file.
362 // Since we don't always take the `wlock` to read it
368 // Since we don't always take the `wlock` to read it
363 // (like in `hg status`), it is susceptible to races.
369 // (like in `hg status`), it is susceptible to races.
364 // A simple retry method should be enough since full rewrites
370 // A simple retry method should be enough since full rewrites
365 // only happen when too much garbage data is present and
371 // only happen when too much garbage data is present and
366 // this race is unlikely.
372 // this race is unlikely.
367 let mut tries = 0;
373 let mut tries = 0;
368
374
369 while tries < V2_MAX_READ_ATTEMPTS {
375 while tries < V2_MAX_READ_ATTEMPTS {
370 tries += 1;
376 tries += 1;
371 match self.read_docket_and_data_file() {
377 match self.read_docket_and_data_file() {
372 Ok(m) => {
378 Ok(m) => {
373 return Ok(m);
379 return Ok(m);
374 }
380 }
375 Err(e) => match e {
381 Err(e) => match e {
376 DirstateError::Common(HgError::RaceDetected(
382 DirstateError::Common(HgError::RaceDetected(
377 context,
383 context,
378 )) => {
384 )) => {
379 log::info!(
385 log::info!(
380 "dirstate read race detected {} (retry {}/{})",
386 "dirstate read race detected {} (retry {}/{})",
381 context,
387 context,
382 tries,
388 tries,
383 V2_MAX_READ_ATTEMPTS,
389 V2_MAX_READ_ATTEMPTS,
384 );
390 );
385 continue;
391 continue;
386 }
392 }
387 _ => {
393 _ => {
388 log::info!(
394 log::info!(
389 "Reading dirstate v2 failed, \
395 "Reading dirstate v2 failed, \
390 falling back to v1"
396 falling back to v1"
391 );
397 );
392 return self.new_dirstate_map_v1();
398 return self.new_dirstate_map_v1();
393 }
399 }
394 },
400 },
395 }
401 }
396 }
402 }
397 let error = HgError::abort(
403 let error = HgError::abort(
398 format!("dirstate read race happened {tries} times in a row"),
404 format!("dirstate read race happened {tries} times in a row"),
399 255,
405 255,
400 None,
406 None,
401 );
407 );
402 Err(DirstateError::Common(error))
408 Err(DirstateError::Common(error))
403 } else {
409 } else {
404 self.new_dirstate_map_v1()
410 self.new_dirstate_map_v1()
405 }
411 }
406 }
412 }
407
413
408 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
414 fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
409 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
415 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
410 let identity = self.dirstate_identity()?;
416 let identity = self.dirstate_identity()?;
411 let dirstate_file_contents = self.dirstate_file_contents()?;
417 let dirstate_file_contents = self.dirstate_file_contents()?;
412 if dirstate_file_contents.is_empty() {
418 if dirstate_file_contents.is_empty() {
413 self.dirstate_parents.set(DirstateParents::NULL);
419 self.dirstate_parents.set(DirstateParents::NULL);
414 Ok(OwningDirstateMap::new_empty(Vec::new()))
420 Ok(OwningDirstateMap::new_empty(Vec::new()))
415 } else {
421 } else {
416 let (map, parents) =
422 let (map, parents) =
417 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
423 OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
418 self.dirstate_parents.set(parents);
424 self.dirstate_parents.set(parents);
419 Ok(map)
425 Ok(map)
420 }
426 }
421 }
427 }
422
428
423 fn read_docket_and_data_file(
429 fn read_docket_and_data_file(
424 &self,
430 &self,
425 ) -> Result<OwningDirstateMap, DirstateError> {
431 ) -> Result<OwningDirstateMap, DirstateError> {
426 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
432 debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
427 let dirstate_file_contents = self.dirstate_file_contents()?;
433 let dirstate_file_contents = self.dirstate_file_contents()?;
428 let identity = self.dirstate_identity()?;
434 let identity = self.dirstate_identity()?;
429 if dirstate_file_contents.is_empty() {
435 if dirstate_file_contents.is_empty() {
430 self.dirstate_parents.set(DirstateParents::NULL);
436 self.dirstate_parents.set(DirstateParents::NULL);
431 return Ok(OwningDirstateMap::new_empty(Vec::new()));
437 return Ok(OwningDirstateMap::new_empty(Vec::new()));
432 }
438 }
433 let docket = crate::dirstate_tree::on_disk::read_docket(
439 let docket = crate::dirstate_tree::on_disk::read_docket(
434 &dirstate_file_contents,
440 &dirstate_file_contents,
435 )?;
441 )?;
436 debug_wait_for_file_or_print(
442 debug_wait_for_file_or_print(
437 self.config(),
443 self.config(),
438 "dirstate.post-docket-read-file",
444 "dirstate.post-docket-read-file",
439 );
445 );
440 self.dirstate_parents.set(docket.parents());
446 self.dirstate_parents.set(docket.parents());
441 let uuid = docket.uuid.to_owned();
447 let uuid = docket.uuid.to_owned();
442 let data_size = docket.data_size();
448 let data_size = docket.data_size();
443
449
444 let context = "between reading dirstate docket and data file";
450 let context = "between reading dirstate docket and data file";
445 let race_error = HgError::RaceDetected(context.into());
451 let race_error = HgError::RaceDetected(context.into());
446 let metadata = docket.tree_metadata();
452 let metadata = docket.tree_metadata();
447
453
448 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
454 let mut map = if crate::vfs::is_on_nfs_mount(docket.data_filename()) {
449 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
455 // Don't mmap on NFS to prevent `SIGBUS` error on deletion
450 let contents = self.hg_vfs().read(docket.data_filename());
456 let contents = self.hg_vfs().read(docket.data_filename());
451 let contents = match contents {
457 let contents = match contents {
452 Ok(c) => c,
458 Ok(c) => c,
453 Err(HgError::IoError { error, context }) => {
459 Err(HgError::IoError { error, context }) => {
454 match error.raw_os_error().expect("real os error") {
460 match error.raw_os_error().expect("real os error") {
455 // 2 = ENOENT, No such file or directory
461 // 2 = ENOENT, No such file or directory
456 // 116 = ESTALE, Stale NFS file handle
462 // 116 = ESTALE, Stale NFS file handle
457 //
463 //
458 // TODO match on `error.kind()` when
464 // TODO match on `error.kind()` when
459 // `ErrorKind::StaleNetworkFileHandle` is stable.
465 // `ErrorKind::StaleNetworkFileHandle` is stable.
460 2 | 116 => {
466 2 | 116 => {
461 // Race where the data file was deleted right after
467 // Race where the data file was deleted right after
462 // we read the docket, try again
468 // we read the docket, try again
463 return Err(race_error.into());
469 return Err(race_error.into());
464 }
470 }
465 _ => {
471 _ => {
466 return Err(
472 return Err(
467 HgError::IoError { error, context }.into()
473 HgError::IoError { error, context }.into()
468 )
474 )
469 }
475 }
470 }
476 }
471 }
477 }
472 Err(e) => return Err(e.into()),
478 Err(e) => return Err(e.into()),
473 };
479 };
474 OwningDirstateMap::new_v2(
480 OwningDirstateMap::new_v2(
475 contents, data_size, metadata, uuid, identity,
481 contents, data_size, metadata, uuid, identity,
476 )
482 )
477 } else {
483 } else {
478 match self
484 match self
479 .hg_vfs()
485 .hg_vfs()
480 .mmap_open(docket.data_filename())
486 .mmap_open(docket.data_filename())
481 .io_not_found_as_none()
487 .io_not_found_as_none()
482 {
488 {
483 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
489 Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
484 data_mmap, data_size, metadata, uuid, identity,
490 data_mmap, data_size, metadata, uuid, identity,
485 ),
491 ),
486 Ok(None) => {
492 Ok(None) => {
487 // Race where the data file was deleted right after we
493 // Race where the data file was deleted right after we
488 // read the docket, try again
494 // read the docket, try again
489 return Err(race_error.into());
495 return Err(race_error.into());
490 }
496 }
491 Err(e) => return Err(e.into()),
497 Err(e) => return Err(e.into()),
492 }
498 }
493 }?;
499 }?;
494
500
495 let write_mode_config = self
501 let write_mode_config = self
496 .config()
502 .config()
497 .get_str(b"devel", b"dirstate.v2.data_update_mode")
503 .get_str(b"devel", b"dirstate.v2.data_update_mode")
498 .unwrap_or(Some("auto"))
504 .unwrap_or(Some("auto"))
499 .unwrap_or("auto"); // don't bother for devel options
505 .unwrap_or("auto"); // don't bother for devel options
500 let write_mode = match write_mode_config {
506 let write_mode = match write_mode_config {
501 "auto" => DirstateMapWriteMode::Auto,
507 "auto" => DirstateMapWriteMode::Auto,
502 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
508 "force-new" => DirstateMapWriteMode::ForceNewDataFile,
503 "force-append" => DirstateMapWriteMode::ForceAppend,
509 "force-append" => DirstateMapWriteMode::ForceAppend,
504 _ => DirstateMapWriteMode::Auto,
510 _ => DirstateMapWriteMode::Auto,
505 };
511 };
506
512
507 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
513 map.with_dmap_mut(|m| m.set_write_mode(write_mode));
508
514
509 Ok(map)
515 Ok(map)
510 }
516 }
511
517
512 pub fn dirstate_map(
518 pub fn dirstate_map(
513 &self,
519 &self,
514 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
520 ) -> Result<Ref<OwningDirstateMap>, DirstateError> {
515 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
521 self.dirstate_map.get_or_init(|| self.new_dirstate_map())
516 }
522 }
517
523
518 pub fn dirstate_map_mut(
524 pub fn dirstate_map_mut(
519 &self,
525 &self,
520 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
526 ) -> Result<RefMut<OwningDirstateMap>, DirstateError> {
521 self.dirstate_map
527 self.dirstate_map
522 .get_mut_or_init(|| self.new_dirstate_map())
528 .get_mut_or_init(|| self.new_dirstate_map())
523 }
529 }
524
530
525 fn new_changelog(&self) -> Result<Changelog, HgError> {
531 fn new_changelog(&self) -> Result<Changelog, HgError> {
526 Changelog::open(&self.store_vfs(), self.has_nodemap())
532 Changelog::open(&self.store_vfs(), self.default_revlog_options(true)?)
527 }
533 }
528
534
529 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
535 pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> {
530 self.changelog.get_or_init(|| self.new_changelog())
536 self.changelog.get_or_init(|| self.new_changelog())
531 }
537 }
532
538
533 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
539 pub fn changelog_mut(&self) -> Result<RefMut<Changelog>, HgError> {
534 self.changelog.get_mut_or_init(|| self.new_changelog())
540 self.changelog.get_mut_or_init(|| self.new_changelog())
535 }
541 }
536
542
537 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
543 fn new_manifestlog(&self) -> Result<Manifestlog, HgError> {
538 Manifestlog::open(&self.store_vfs(), self.has_nodemap())
544 Manifestlog::open(
545 &self.store_vfs(),
546 self.default_revlog_options(false)?,
547 )
539 }
548 }
540
549
541 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
550 pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> {
542 self.manifestlog.get_or_init(|| self.new_manifestlog())
551 self.manifestlog.get_or_init(|| self.new_manifestlog())
543 }
552 }
544
553
545 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
554 pub fn manifestlog_mut(&self) -> Result<RefMut<Manifestlog>, HgError> {
546 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
555 self.manifestlog.get_mut_or_init(|| self.new_manifestlog())
547 }
556 }
548
557
549 /// Returns the manifest of the *changeset* with the given node ID
558 /// Returns the manifest of the *changeset* with the given node ID
550 pub fn manifest_for_node(
559 pub fn manifest_for_node(
551 &self,
560 &self,
552 node: impl Into<NodePrefix>,
561 node: impl Into<NodePrefix>,
553 ) -> Result<Manifest, RevlogError> {
562 ) -> Result<Manifest, RevlogError> {
554 self.manifestlog()?.data_for_node(
563 self.manifestlog()?.data_for_node(
555 self.changelog()?
564 self.changelog()?
556 .data_for_node(node.into())?
565 .data_for_node(node.into())?
557 .manifest_node()?
566 .manifest_node()?
558 .into(),
567 .into(),
559 )
568 )
560 }
569 }
561
570
562 /// Returns the manifest of the *changeset* with the given revision number
571 /// Returns the manifest of the *changeset* with the given revision number
563 pub fn manifest_for_rev(
572 pub fn manifest_for_rev(
564 &self,
573 &self,
565 revision: UncheckedRevision,
574 revision: UncheckedRevision,
566 ) -> Result<Manifest, RevlogError> {
575 ) -> Result<Manifest, RevlogError> {
567 self.manifestlog()?.data_for_node(
576 self.manifestlog()?.data_for_node(
568 self.changelog()?
577 self.changelog()?
569 .data_for_rev(revision)?
578 .data_for_rev(revision)?
570 .manifest_node()?
579 .manifest_node()?
571 .into(),
580 .into(),
572 )
581 )
573 }
582 }
574
583
575 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
584 pub fn has_subrepos(&self) -> Result<bool, DirstateError> {
576 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
585 if let Some(entry) = self.dirstate_map()?.get(HgPath::new(".hgsub"))? {
577 Ok(entry.tracked())
586 Ok(entry.tracked())
578 } else {
587 } else {
579 Ok(false)
588 Ok(false)
580 }
589 }
581 }
590 }
582
591
583 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
592 pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
584 Filelog::open(self, path)
593 Filelog::open(self, path, self.default_revlog_options(false)?)
585 }
594 }
586
595
587 /// Write to disk any updates that were made through `dirstate_map_mut`.
596 /// Write to disk any updates that were made through `dirstate_map_mut`.
588 ///
597 ///
589 /// The "wlock" must be held while calling this.
598 /// The "wlock" must be held while calling this.
590 /// See for example `try_with_wlock_no_wait`.
599 /// See for example `try_with_wlock_no_wait`.
591 ///
600 ///
592 /// TODO: have a `WritableRepo` type only accessible while holding the
601 /// TODO: have a `WritableRepo` type only accessible while holding the
593 /// lock?
602 /// lock?
594 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
603 pub fn write_dirstate(&self) -> Result<(), DirstateError> {
595 let map = self.dirstate_map()?;
604 let map = self.dirstate_map()?;
596 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
605 // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
597 // it’s unset
606 // it’s unset
598 let parents = self.dirstate_parents()?;
607 let parents = self.dirstate_parents()?;
599 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
608 let (packed_dirstate, old_uuid_to_remove) = if self.use_dirstate_v2() {
600 let (identity, uuid, data_size) =
609 let (identity, uuid, data_size) =
601 self.get_dirstate_data_file_integrity()?;
610 self.get_dirstate_data_file_integrity()?;
602 let identity_changed = identity != map.old_identity();
611 let identity_changed = identity != map.old_identity();
603 let uuid_changed = uuid.as_deref() != map.old_uuid();
612 let uuid_changed = uuid.as_deref() != map.old_uuid();
604 let data_length_changed = data_size != map.old_data_size();
613 let data_length_changed = data_size != map.old_data_size();
605
614
606 if identity_changed || uuid_changed || data_length_changed {
615 if identity_changed || uuid_changed || data_length_changed {
607 // If any of identity, uuid or length have changed since
616 // If any of identity, uuid or length have changed since
608 // last disk read, don't write.
617 // last disk read, don't write.
609 // This is fine because either we're in a command that doesn't
618 // This is fine because either we're in a command that doesn't
610 // write anything too important (like `hg status`), or we're in
619 // write anything too important (like `hg status`), or we're in
611 // `hg add` and we're supposed to have taken the lock before
620 // `hg add` and we're supposed to have taken the lock before
612 // reading anyway.
621 // reading anyway.
613 //
622 //
614 // TODO complain loudly if we've changed anything important
623 // TODO complain loudly if we've changed anything important
615 // without taking the lock.
624 // without taking the lock.
616 // (see `hg help config.format.use-dirstate-tracked-hint`)
625 // (see `hg help config.format.use-dirstate-tracked-hint`)
617 log::debug!(
626 log::debug!(
618 "dirstate has changed since last read, not updating."
627 "dirstate has changed since last read, not updating."
619 );
628 );
620 return Ok(());
629 return Ok(());
621 }
630 }
622
631
623 let uuid_opt = map.old_uuid();
632 let uuid_opt = map.old_uuid();
624 let write_mode = if uuid_opt.is_some() {
633 let write_mode = if uuid_opt.is_some() {
625 DirstateMapWriteMode::Auto
634 DirstateMapWriteMode::Auto
626 } else {
635 } else {
627 DirstateMapWriteMode::ForceNewDataFile
636 DirstateMapWriteMode::ForceNewDataFile
628 };
637 };
629 let (data, tree_metadata, append, old_data_size) =
638 let (data, tree_metadata, append, old_data_size) =
630 map.pack_v2(write_mode)?;
639 map.pack_v2(write_mode)?;
631
640
632 // Reuse the uuid, or generate a new one, keeping the old for
641 // Reuse the uuid, or generate a new one, keeping the old for
633 // deletion.
642 // deletion.
634 let (uuid, old_uuid) = match uuid_opt {
643 let (uuid, old_uuid) = match uuid_opt {
635 Some(uuid) => {
644 Some(uuid) => {
636 let as_str = std::str::from_utf8(uuid)
645 let as_str = std::str::from_utf8(uuid)
637 .map_err(|_| {
646 .map_err(|_| {
638 HgError::corrupted(
647 HgError::corrupted(
639 "non-UTF-8 dirstate data file ID",
648 "non-UTF-8 dirstate data file ID",
640 )
649 )
641 })?
650 })?
642 .to_owned();
651 .to_owned();
643 if append {
652 if append {
644 (as_str, None)
653 (as_str, None)
645 } else {
654 } else {
646 (DirstateDocket::new_uid(), Some(as_str))
655 (DirstateDocket::new_uid(), Some(as_str))
647 }
656 }
648 }
657 }
649 None => (DirstateDocket::new_uid(), None),
658 None => (DirstateDocket::new_uid(), None),
650 };
659 };
651
660
652 let data_filename = format!("dirstate.{}", uuid);
661 let data_filename = format!("dirstate.{}", uuid);
653 let data_filename = self.hg_vfs().join(data_filename);
662 let data_filename = self.hg_vfs().join(data_filename);
654 let mut options = std::fs::OpenOptions::new();
663 let mut options = std::fs::OpenOptions::new();
655 options.write(true);
664 options.write(true);
656
665
657 // Why are we not using the O_APPEND flag when appending?
666 // Why are we not using the O_APPEND flag when appending?
658 //
667 //
659 // - O_APPEND makes it trickier to deal with garbage at the end of
668 // - O_APPEND makes it trickier to deal with garbage at the end of
660 // the file, left by a previous uncommitted transaction. By
669 // the file, left by a previous uncommitted transaction. By
661 // starting the write at [old_data_size] we make sure we erase
670 // starting the write at [old_data_size] we make sure we erase
662 // all such garbage.
671 // all such garbage.
663 //
672 //
664 // - O_APPEND requires to special-case 0-byte writes, whereas we
673 // - O_APPEND requires to special-case 0-byte writes, whereas we
665 // don't need that.
674 // don't need that.
666 //
675 //
667 // - Some OSes have bugs in implementation O_APPEND:
676 // - Some OSes have bugs in implementation O_APPEND:
668 // revlog.py talks about a Solaris bug, but we also saw some ZFS
677 // revlog.py talks about a Solaris bug, but we also saw some ZFS
669 // bug: https://github.com/openzfs/zfs/pull/3124,
678 // bug: https://github.com/openzfs/zfs/pull/3124,
670 // https://github.com/openzfs/zfs/issues/13370
679 // https://github.com/openzfs/zfs/issues/13370
671 //
680 //
672 if !append {
681 if !append {
673 log::trace!("creating a new dirstate data file");
682 log::trace!("creating a new dirstate data file");
674 options.create_new(true);
683 options.create_new(true);
675 } else {
684 } else {
676 log::trace!("appending to the dirstate data file");
685 log::trace!("appending to the dirstate data file");
677 }
686 }
678
687
679 let data_size = (|| {
688 let data_size = (|| {
680 // TODO: loop and try another random ID if !append and this
689 // TODO: loop and try another random ID if !append and this
681 // returns `ErrorKind::AlreadyExists`? Collision chance of two
690 // returns `ErrorKind::AlreadyExists`? Collision chance of two
682 // random IDs is one in 2**32
691 // random IDs is one in 2**32
683 let mut file = options.open(&data_filename)?;
692 let mut file = options.open(&data_filename)?;
684 if append {
693 if append {
685 file.seek(SeekFrom::Start(old_data_size as u64))?;
694 file.seek(SeekFrom::Start(old_data_size as u64))?;
686 }
695 }
687 file.write_all(&data)?;
696 file.write_all(&data)?;
688 file.flush()?;
697 file.flush()?;
689 file.stream_position()
698 file.stream_position()
690 })()
699 })()
691 .when_writing_file(&data_filename)?;
700 .when_writing_file(&data_filename)?;
692
701
693 let packed_dirstate = DirstateDocket::serialize(
702 let packed_dirstate = DirstateDocket::serialize(
694 parents,
703 parents,
695 tree_metadata,
704 tree_metadata,
696 data_size,
705 data_size,
697 uuid.as_bytes(),
706 uuid.as_bytes(),
698 )
707 )
699 .map_err(|_: std::num::TryFromIntError| {
708 .map_err(|_: std::num::TryFromIntError| {
700 HgError::corrupted("overflow in dirstate docket serialization")
709 HgError::corrupted("overflow in dirstate docket serialization")
701 })?;
710 })?;
702
711
703 (packed_dirstate, old_uuid)
712 (packed_dirstate, old_uuid)
704 } else {
713 } else {
705 let identity = self.dirstate_identity()?;
714 let identity = self.dirstate_identity()?;
706 if identity != map.old_identity() {
715 if identity != map.old_identity() {
707 // If identity changed since last disk read, don't write.
716 // If identity changed since last disk read, don't write.
708 // This is fine because either we're in a command that doesn't
717 // This is fine because either we're in a command that doesn't
709 // write anything too important (like `hg status`), or we're in
718 // write anything too important (like `hg status`), or we're in
710 // `hg add` and we're supposed to have taken the lock before
719 // `hg add` and we're supposed to have taken the lock before
711 // reading anyway.
720 // reading anyway.
712 //
721 //
713 // TODO complain loudly if we've changed anything important
722 // TODO complain loudly if we've changed anything important
714 // without taking the lock.
723 // without taking the lock.
715 // (see `hg help config.format.use-dirstate-tracked-hint`)
724 // (see `hg help config.format.use-dirstate-tracked-hint`)
716 log::debug!(
725 log::debug!(
717 "dirstate has changed since last read, not updating."
726 "dirstate has changed since last read, not updating."
718 );
727 );
719 return Ok(());
728 return Ok(());
720 }
729 }
721 (map.pack_v1(parents)?, None)
730 (map.pack_v1(parents)?, None)
722 };
731 };
723
732
724 let vfs = self.hg_vfs();
733 let vfs = self.hg_vfs();
725 vfs.atomic_write("dirstate", &packed_dirstate)?;
734 vfs.atomic_write("dirstate", &packed_dirstate)?;
726 if let Some(uuid) = old_uuid_to_remove {
735 if let Some(uuid) = old_uuid_to_remove {
727 // Remove the old data file after the new docket pointing to the
736 // Remove the old data file after the new docket pointing to the
728 // new data file was written.
737 // new data file was written.
729 vfs.remove_file(format!("dirstate.{}", uuid))?;
738 vfs.remove_file(format!("dirstate.{}", uuid))?;
730 }
739 }
731 Ok(())
740 Ok(())
732 }
741 }
742
743 pub fn default_revlog_options(
744 &self,
745 changelog: bool,
746 ) -> Result<RevlogOpenOptions, HgError> {
747 let requirements = self.requirements();
748 let version = if changelog
749 && requirements.contains(CHANGELOGV2_REQUIREMENT)
750 {
751 let compute_rank = self
752 .config()
753 .get_bool(b"experimental", b"changelog-v2.compute-rank")?;
754 RevlogVersionOptions::ChangelogV2 { compute_rank }
755 } else if requirements.contains(REVLOGV2_REQUIREMENT) {
756 RevlogVersionOptions::V2
757 } else if requirements.contains(REVLOGV1_REQUIREMENT) {
758 RevlogVersionOptions::V1 {
759 generaldelta: requirements.contains(GENERALDELTA_REQUIREMENT),
760 }
761 } else {
762 RevlogVersionOptions::V0
763 };
764 Ok(RevlogOpenOptions {
765 version,
766 // We don't need to dance around the slow path like in the Python
767 // implementation since we know we have access to the fast code.
768 use_nodemap: requirements.contains(NODEMAP_REQUIREMENT),
769 })
770 }
733 }
771 }
734
772
735 /// Lazily-initialized component of `Repo` with interior mutability
773 /// Lazily-initialized component of `Repo` with interior mutability
736 ///
774 ///
737 /// This differs from `OnceCell` in that the value can still be "deinitialized"
775 /// This differs from `OnceCell` in that the value can still be "deinitialized"
738 /// later by setting its inner `Option` to `None`. It also takes the
776 /// later by setting its inner `Option` to `None`. It also takes the
739 /// initialization function as an argument when the value is requested, not
777 /// initialization function as an argument when the value is requested, not
740 /// when the instance is created.
778 /// when the instance is created.
741 struct LazyCell<T> {
779 struct LazyCell<T> {
742 value: RefCell<Option<T>>,
780 value: RefCell<Option<T>>,
743 }
781 }
744
782
745 impl<T> LazyCell<T> {
783 impl<T> LazyCell<T> {
746 fn new() -> Self {
784 fn new() -> Self {
747 Self {
785 Self {
748 value: RefCell::new(None),
786 value: RefCell::new(None),
749 }
787 }
750 }
788 }
751
789
752 fn set(&self, value: T) {
790 fn set(&self, value: T) {
753 *self.value.borrow_mut() = Some(value)
791 *self.value.borrow_mut() = Some(value)
754 }
792 }
755
793
756 fn get_or_init<E>(
794 fn get_or_init<E>(
757 &self,
795 &self,
758 init: impl Fn() -> Result<T, E>,
796 init: impl Fn() -> Result<T, E>,
759 ) -> Result<Ref<T>, E> {
797 ) -> Result<Ref<T>, E> {
760 let mut borrowed = self.value.borrow();
798 let mut borrowed = self.value.borrow();
761 if borrowed.is_none() {
799 if borrowed.is_none() {
762 drop(borrowed);
800 drop(borrowed);
763 // Only use `borrow_mut` if it is really needed to avoid panic in
801 // Only use `borrow_mut` if it is really needed to avoid panic in
764 // case there is another outstanding borrow but mutation is not
802 // case there is another outstanding borrow but mutation is not
765 // needed.
803 // needed.
766 *self.value.borrow_mut() = Some(init()?);
804 *self.value.borrow_mut() = Some(init()?);
767 borrowed = self.value.borrow()
805 borrowed = self.value.borrow()
768 }
806 }
769 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
807 Ok(Ref::map(borrowed, |option| option.as_ref().unwrap()))
770 }
808 }
771
809
772 fn get_mut_or_init<E>(
810 fn get_mut_or_init<E>(
773 &self,
811 &self,
774 init: impl Fn() -> Result<T, E>,
812 init: impl Fn() -> Result<T, E>,
775 ) -> Result<RefMut<T>, E> {
813 ) -> Result<RefMut<T>, E> {
776 let mut borrowed = self.value.borrow_mut();
814 let mut borrowed = self.value.borrow_mut();
777 if borrowed.is_none() {
815 if borrowed.is_none() {
778 *borrowed = Some(init()?);
816 *borrowed = Some(init()?);
779 }
817 }
780 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
818 Ok(RefMut::map(borrowed, |option| option.as_mut().unwrap()))
781 }
819 }
782 }
820 }
@@ -1,173 +1,183 b''
1 use crate::errors::{HgError, HgResultExt};
1 use crate::errors::{HgError, HgResultExt};
2 use crate::repo::Repo;
2 use crate::repo::Repo;
3 use crate::utils::join_display;
3 use crate::utils::join_display;
4 use crate::vfs::Vfs;
4 use crate::vfs::Vfs;
5 use std::collections::HashSet;
5 use std::collections::HashSet;
6
6
7 fn parse(bytes: &[u8]) -> Result<HashSet<String>, HgError> {
7 fn parse(bytes: &[u8]) -> Result<HashSet<String>, HgError> {
8 // The Python code reading this file uses `str.splitlines`
8 // The Python code reading this file uses `str.splitlines`
9 // which looks for a number of line separators (even including a couple of
9 // which looks for a number of line separators (even including a couple of
10 // non-ASCII ones), but Python code writing it always uses `\n`.
10 // non-ASCII ones), but Python code writing it always uses `\n`.
11 let lines = bytes.split(|&byte| byte == b'\n');
11 let lines = bytes.split(|&byte| byte == b'\n');
12
12
13 lines
13 lines
14 .filter(|line| !line.is_empty())
14 .filter(|line| !line.is_empty())
15 .map(|line| {
15 .map(|line| {
16 // Python uses Unicode `str.isalnum` but feature names are all
16 // Python uses Unicode `str.isalnum` but feature names are all
17 // ASCII
17 // ASCII
18 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
18 if line[0].is_ascii_alphanumeric() && line.is_ascii() {
19 Ok(String::from_utf8(line.into()).unwrap())
19 Ok(String::from_utf8(line.into()).unwrap())
20 } else {
20 } else {
21 Err(HgError::corrupted("parse error in 'requires' file"))
21 Err(HgError::corrupted("parse error in 'requires' file"))
22 }
22 }
23 })
23 })
24 .collect()
24 .collect()
25 }
25 }
26
26
27 pub(crate) fn load(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
27 pub(crate) fn load(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
28 parse(&hg_vfs.read("requires")?)
28 parse(&hg_vfs.read("requires")?)
29 }
29 }
30
30
31 pub(crate) fn load_if_exists(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
31 pub(crate) fn load_if_exists(hg_vfs: Vfs) -> Result<HashSet<String>, HgError> {
32 if let Some(bytes) = hg_vfs.read("requires").io_not_found_as_none()? {
32 if let Some(bytes) = hg_vfs.read("requires").io_not_found_as_none()? {
33 parse(&bytes)
33 parse(&bytes)
34 } else {
34 } else {
35 // Treat a missing file the same as an empty file.
35 // Treat a missing file the same as an empty file.
36 // From `mercurial/localrepo.py`:
36 // From `mercurial/localrepo.py`:
37 // > requires file contains a newline-delimited list of
37 // > requires file contains a newline-delimited list of
38 // > features/capabilities the opener (us) must have in order to use
38 // > features/capabilities the opener (us) must have in order to use
39 // > the repository. This file was introduced in Mercurial 0.9.2,
39 // > the repository. This file was introduced in Mercurial 0.9.2,
40 // > which means very old repositories may not have one. We assume
40 // > which means very old repositories may not have one. We assume
41 // > a missing file translates to no requirements.
41 // > a missing file translates to no requirements.
42 Ok(HashSet::new())
42 Ok(HashSet::new())
43 }
43 }
44 }
44 }
45
45
46 pub(crate) fn check(repo: &Repo) -> Result<(), HgError> {
46 pub(crate) fn check(repo: &Repo) -> Result<(), HgError> {
47 let unknown: Vec<_> = repo
47 let unknown: Vec<_> = repo
48 .requirements()
48 .requirements()
49 .iter()
49 .iter()
50 .map(String::as_str)
50 .map(String::as_str)
51 // .filter(|feature| !ALL_SUPPORTED.contains(feature.as_str()))
51 // .filter(|feature| !ALL_SUPPORTED.contains(feature.as_str()))
52 .filter(|feature| {
52 .filter(|feature| {
53 !REQUIRED.contains(feature) && !SUPPORTED.contains(feature)
53 !REQUIRED.contains(feature) && !SUPPORTED.contains(feature)
54 })
54 })
55 .collect();
55 .collect();
56 if !unknown.is_empty() {
56 if !unknown.is_empty() {
57 return Err(HgError::unsupported(format!(
57 return Err(HgError::unsupported(format!(
58 "repository requires feature unknown to this Mercurial: {}",
58 "repository requires feature unknown to this Mercurial: {}",
59 join_display(&unknown, ", ")
59 join_display(&unknown, ", ")
60 )));
60 )));
61 }
61 }
62 let missing: Vec<_> = REQUIRED
62 let missing: Vec<_> = REQUIRED
63 .iter()
63 .iter()
64 .filter(|&&feature| !repo.requirements().contains(feature))
64 .filter(|&&feature| !repo.requirements().contains(feature))
65 .collect();
65 .collect();
66 if !missing.is_empty() {
66 if !missing.is_empty() {
67 return Err(HgError::unsupported(format!(
67 return Err(HgError::unsupported(format!(
68 "repository is missing feature required by this Mercurial: {}",
68 "repository is missing feature required by this Mercurial: {}",
69 join_display(&missing, ", ")
69 join_display(&missing, ", ")
70 )));
70 )));
71 }
71 }
72 Ok(())
72 Ok(())
73 }
73 }
74
74
75 /// rhg does not support repositories that are *missing* any of these features
75 /// rhg does not support repositories that are *missing* any of these features
76 const REQUIRED: &[&str] = &["revlogv1", "store", "fncache", "dotencode"];
76 const REQUIRED: &[&str] = &["revlogv1", "store", "fncache", "dotencode"];
77
77
78 /// rhg supports repository with or without these
78 /// rhg supports repository with or without these
79 const SUPPORTED: &[&str] = &[
79 const SUPPORTED: &[&str] = &[
80 "generaldelta",
80 GENERALDELTA_REQUIREMENT,
81 SHARED_REQUIREMENT,
81 SHARED_REQUIREMENT,
82 SHARESAFE_REQUIREMENT,
82 SHARESAFE_REQUIREMENT,
83 SPARSEREVLOG_REQUIREMENT,
83 SPARSEREVLOG_REQUIREMENT,
84 RELATIVE_SHARED_REQUIREMENT,
84 RELATIVE_SHARED_REQUIREMENT,
85 REVLOG_COMPRESSION_ZSTD,
85 REVLOG_COMPRESSION_ZSTD,
86 DIRSTATE_V2_REQUIREMENT,
86 DIRSTATE_V2_REQUIREMENT,
87 DIRSTATE_TRACKED_HINT_V1,
87 DIRSTATE_TRACKED_HINT_V1,
88 // As of this writing everything rhg does is read-only.
88 // As of this writing everything rhg does is read-only.
89 // When it starts writing to the repository, it’ll need to either keep the
89 // When it starts writing to the repository, it’ll need to either keep the
90 // persistent nodemap up to date or remove this entry:
90 // persistent nodemap up to date or remove this entry:
91 NODEMAP_REQUIREMENT,
91 NODEMAP_REQUIREMENT,
92 // Not all commands support `sparse` and `narrow`. The commands that do
92 // Not all commands support `sparse` and `narrow`. The commands that do
93 // not should opt out by checking `has_sparse` and `has_narrow`.
93 // not should opt out by checking `has_sparse` and `has_narrow`.
94 SPARSE_REQUIREMENT,
94 SPARSE_REQUIREMENT,
95 NARROW_REQUIREMENT,
95 NARROW_REQUIREMENT,
96 // rhg doesn't care about bookmarks at all yet
96 // rhg doesn't care about bookmarks at all yet
97 BOOKMARKS_IN_STORE_REQUIREMENT,
97 BOOKMARKS_IN_STORE_REQUIREMENT,
98 ];
98 ];
99
99
100 // Copied from mercurial/requirements.py:
100 // Copied from mercurial/requirements.py:
101
101
102 pub const DIRSTATE_V2_REQUIREMENT: &str = "dirstate-v2";
102 pub const DIRSTATE_V2_REQUIREMENT: &str = "dirstate-v2";
103 pub const GENERALDELTA_REQUIREMENT: &str = "generaldelta";
103
104
104 /// A repository that uses the tracked hint dirstate file
105 /// A repository that uses the tracked hint dirstate file
105 #[allow(unused)]
106 #[allow(unused)]
106 pub const DIRSTATE_TRACKED_HINT_V1: &str = "dirstate-tracked-key-v1";
107 pub const DIRSTATE_TRACKED_HINT_V1: &str = "dirstate-tracked-key-v1";
107
108
108 /// When narrowing is finalized and no longer subject to format changes,
109 /// When narrowing is finalized and no longer subject to format changes,
109 /// we should move this to just "narrow" or similar.
110 /// we should move this to just "narrow" or similar.
110 #[allow(unused)]
111 #[allow(unused)]
111 pub const NARROW_REQUIREMENT: &str = "narrowhg-experimental";
112 pub const NARROW_REQUIREMENT: &str = "narrowhg-experimental";
112
113
113 /// Bookmarks must be stored in the `store` part of the repository and will be
114 /// Bookmarks must be stored in the `store` part of the repository and will be
114 /// share accross shares
115 /// share accross shares
115 #[allow(unused)]
116 #[allow(unused)]
116 pub const BOOKMARKS_IN_STORE_REQUIREMENT: &str = "bookmarksinstore";
117 pub const BOOKMARKS_IN_STORE_REQUIREMENT: &str = "bookmarksinstore";
117
118
118 /// Enables sparse working directory usage
119 /// Enables sparse working directory usage
119 #[allow(unused)]
120 #[allow(unused)]
120 pub const SPARSE_REQUIREMENT: &str = "exp-sparse";
121 pub const SPARSE_REQUIREMENT: &str = "exp-sparse";
121
122
122 /// Enables the internal phase which is used to hide changesets instead
123 /// Enables the internal phase which is used to hide changesets instead
123 /// of stripping them
124 /// of stripping them
124 #[allow(unused)]
125 #[allow(unused)]
125 pub const INTERNAL_PHASE_REQUIREMENT: &str = "internal-phase";
126 pub const INTERNAL_PHASE_REQUIREMENT: &str = "internal-phase";
126
127
127 /// Stores manifest in Tree structure
128 /// Stores manifest in Tree structure
128 #[allow(unused)]
129 #[allow(unused)]
129 pub const TREEMANIFEST_REQUIREMENT: &str = "treemanifest";
130 pub const TREEMANIFEST_REQUIREMENT: &str = "treemanifest";
130
131
132 /// Whether to use the "RevlogNG" or V1 of the revlog format
133 #[allow(unused)]
134 pub const REVLOGV1_REQUIREMENT: &str = "revlogv1";
135
131 /// Increment the sub-version when the revlog v2 format changes to lock out old
136 /// Increment the sub-version when the revlog v2 format changes to lock out old
132 /// clients.
137 /// clients.
133 #[allow(unused)]
138 #[allow(unused)]
134 pub const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1";
139 pub const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1";
135
140
141 /// Increment the sub-version when the revlog v2 format changes to lock out old
142 /// clients.
143 #[allow(unused)]
144 pub const CHANGELOGV2_REQUIREMENT: &str = "exp-changelog-v2";
145
136 /// A repository with the sparserevlog feature will have delta chains that
146 /// A repository with the sparserevlog feature will have delta chains that
137 /// can spread over a larger span. Sparse reading cuts these large spans into
147 /// can spread over a larger span. Sparse reading cuts these large spans into
138 /// pieces, so that each piece isn't too big.
148 /// pieces, so that each piece isn't too big.
139 /// Without the sparserevlog capability, reading from the repository could use
149 /// Without the sparserevlog capability, reading from the repository could use
140 /// huge amounts of memory, because the whole span would be read at once,
150 /// huge amounts of memory, because the whole span would be read at once,
141 /// including all the intermediate revisions that aren't pertinent for the
151 /// including all the intermediate revisions that aren't pertinent for the
142 /// chain. This is why once a repository has enabled sparse-read, it becomes
152 /// chain. This is why once a repository has enabled sparse-read, it becomes
143 /// required.
153 /// required.
144 #[allow(unused)]
154 #[allow(unused)]
145 pub const SPARSEREVLOG_REQUIREMENT: &str = "sparserevlog";
155 pub const SPARSEREVLOG_REQUIREMENT: &str = "sparserevlog";
146
156
147 /// A repository with the the copies-sidedata-changeset requirement will store
157 /// A repository with the the copies-sidedata-changeset requirement will store
148 /// copies related information in changeset's sidedata.
158 /// copies related information in changeset's sidedata.
149 #[allow(unused)]
159 #[allow(unused)]
150 pub const COPIESSDC_REQUIREMENT: &str = "exp-copies-sidedata-changeset";
160 pub const COPIESSDC_REQUIREMENT: &str = "exp-copies-sidedata-changeset";
151
161
152 /// The repository use persistent nodemap for the changelog and the manifest.
162 /// The repository use persistent nodemap for the changelog and the manifest.
153 #[allow(unused)]
163 #[allow(unused)]
154 pub const NODEMAP_REQUIREMENT: &str = "persistent-nodemap";
164 pub const NODEMAP_REQUIREMENT: &str = "persistent-nodemap";
155
165
156 /// Denotes that the current repository is a share
166 /// Denotes that the current repository is a share
157 #[allow(unused)]
167 #[allow(unused)]
158 pub const SHARED_REQUIREMENT: &str = "shared";
168 pub const SHARED_REQUIREMENT: &str = "shared";
159
169
160 /// Denotes that current repository is a share and the shared source path is
170 /// Denotes that current repository is a share and the shared source path is
161 /// relative to the current repository root path
171 /// relative to the current repository root path
162 #[allow(unused)]
172 #[allow(unused)]
163 pub const RELATIVE_SHARED_REQUIREMENT: &str = "relshared";
173 pub const RELATIVE_SHARED_REQUIREMENT: &str = "relshared";
164
174
165 /// A repository with share implemented safely. The repository has different
175 /// A repository with share implemented safely. The repository has different
166 /// store and working copy requirements i.e. both `.hg/requires` and
176 /// store and working copy requirements i.e. both `.hg/requires` and
167 /// `.hg/store/requires` are present.
177 /// `.hg/store/requires` are present.
168 #[allow(unused)]
178 #[allow(unused)]
169 pub const SHARESAFE_REQUIREMENT: &str = "share-safe";
179 pub const SHARESAFE_REQUIREMENT: &str = "share-safe";
170
180
171 /// A repository that use zstd compression inside its revlog
181 /// A repository that use zstd compression inside its revlog
172 #[allow(unused)]
182 #[allow(unused)]
173 pub const REVLOG_COMPRESSION_ZSTD: &str = "revlog-compression-zstd";
183 pub const REVLOG_COMPRESSION_ZSTD: &str = "revlog-compression-zstd";
@@ -1,359 +1,363 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::Revision;
2 use crate::revlog::Revision;
3 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Node, NodePrefix};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
4 use crate::revlog::{Revlog, RevlogEntry, RevlogError};
5 use crate::utils::hg_path::HgPath;
5 use crate::utils::hg_path::HgPath;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use crate::{Graph, GraphError, UncheckedRevision};
7 use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision};
8 use itertools::Itertools;
8 use itertools::Itertools;
9 use std::ascii::escape_default;
9 use std::ascii::escape_default;
10 use std::borrow::Cow;
10 use std::borrow::Cow;
11 use std::fmt::{Debug, Formatter};
11 use std::fmt::{Debug, Formatter};
12
12
13 /// A specialized `Revlog` to work with changelog data format.
13 /// A specialized `Revlog` to work with changelog data format.
14 pub struct Changelog {
14 pub struct Changelog {
15 /// The generic `revlog` format.
15 /// The generic `revlog` format.
16 pub(crate) revlog: Revlog,
16 pub(crate) revlog: Revlog,
17 }
17 }
18
18
19 impl Changelog {
19 impl Changelog {
20 /// Open the `changelog` of a repository given by its root.
20 /// Open the `changelog` of a repository given by its root.
21 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
21 pub fn open(
22 let revlog =
22 store_vfs: &Vfs,
23 Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?;
23 options: RevlogOpenOptions,
24 ) -> Result<Self, HgError> {
25 let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?;
24 Ok(Self { revlog })
26 Ok(Self { revlog })
25 }
27 }
26
28
27 /// Return the `ChangelogRevisionData` for the given node ID.
29 /// Return the `ChangelogRevisionData` for the given node ID.
28 pub fn data_for_node(
30 pub fn data_for_node(
29 &self,
31 &self,
30 node: NodePrefix,
32 node: NodePrefix,
31 ) -> Result<ChangelogRevisionData, RevlogError> {
33 ) -> Result<ChangelogRevisionData, RevlogError> {
32 let rev = self.revlog.rev_from_node(node)?;
34 let rev = self.revlog.rev_from_node(node)?;
33 self.entry_for_checked_rev(rev)?.data()
35 self.entry_for_checked_rev(rev)?.data()
34 }
36 }
35
37
36 /// Return the [`ChangelogEntry`] for the given revision number.
38 /// Return the [`ChangelogEntry`] for the given revision number.
37 pub fn entry_for_rev(
39 pub fn entry_for_rev(
38 &self,
40 &self,
39 rev: UncheckedRevision,
41 rev: UncheckedRevision,
40 ) -> Result<ChangelogEntry, RevlogError> {
42 ) -> Result<ChangelogEntry, RevlogError> {
41 let revlog_entry = self.revlog.get_entry(rev)?;
43 let revlog_entry = self.revlog.get_entry(rev)?;
42 Ok(ChangelogEntry { revlog_entry })
44 Ok(ChangelogEntry { revlog_entry })
43 }
45 }
44
46
45 /// Same as [`Self::entry_for_rev`] for checked revisions.
47 /// Same as [`Self::entry_for_rev`] for checked revisions.
46 fn entry_for_checked_rev(
48 fn entry_for_checked_rev(
47 &self,
49 &self,
48 rev: Revision,
50 rev: Revision,
49 ) -> Result<ChangelogEntry, RevlogError> {
51 ) -> Result<ChangelogEntry, RevlogError> {
50 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
52 let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?;
51 Ok(ChangelogEntry { revlog_entry })
53 Ok(ChangelogEntry { revlog_entry })
52 }
54 }
53
55
54 /// Return the [`ChangelogRevisionData`] for the given revision number.
56 /// Return the [`ChangelogRevisionData`] for the given revision number.
55 ///
57 ///
56 /// This is a useful shortcut in case the caller does not need the
58 /// This is a useful shortcut in case the caller does not need the
57 /// generic revlog information (parents, hashes etc). Otherwise
59 /// generic revlog information (parents, hashes etc). Otherwise
58 /// consider taking a [`ChangelogEntry`] with
60 /// consider taking a [`ChangelogEntry`] with
59 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
61 /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there.
60 pub fn data_for_rev(
62 pub fn data_for_rev(
61 &self,
63 &self,
62 rev: UncheckedRevision,
64 rev: UncheckedRevision,
63 ) -> Result<ChangelogRevisionData, RevlogError> {
65 ) -> Result<ChangelogRevisionData, RevlogError> {
64 self.entry_for_rev(rev)?.data()
66 self.entry_for_rev(rev)?.data()
65 }
67 }
66
68
67 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
69 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
68 self.revlog.node_from_rev(rev)
70 self.revlog.node_from_rev(rev)
69 }
71 }
70
72
71 pub fn rev_from_node(
73 pub fn rev_from_node(
72 &self,
74 &self,
73 node: NodePrefix,
75 node: NodePrefix,
74 ) -> Result<Revision, RevlogError> {
76 ) -> Result<Revision, RevlogError> {
75 self.revlog.rev_from_node(node)
77 self.revlog.rev_from_node(node)
76 }
78 }
77 }
79 }
78
80
79 impl Graph for Changelog {
81 impl Graph for Changelog {
80 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
82 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
81 self.revlog.parents(rev)
83 self.revlog.parents(rev)
82 }
84 }
83 }
85 }
84
86
85 /// A specialized `RevlogEntry` for `changelog` data format
87 /// A specialized `RevlogEntry` for `changelog` data format
86 ///
88 ///
87 /// This is a `RevlogEntry` with the added semantics that the associated
89 /// This is a `RevlogEntry` with the added semantics that the associated
88 /// data should meet the requirements for `changelog`, materialized by
90 /// data should meet the requirements for `changelog`, materialized by
89 /// the fact that `data()` constructs a `ChangelogRevisionData`.
91 /// the fact that `data()` constructs a `ChangelogRevisionData`.
90 /// In case that promise would be broken, the `data` method returns an error.
92 /// In case that promise would be broken, the `data` method returns an error.
91 #[derive(Clone)]
93 #[derive(Clone)]
92 pub struct ChangelogEntry<'changelog> {
94 pub struct ChangelogEntry<'changelog> {
93 /// Same data, as a generic `RevlogEntry`.
95 /// Same data, as a generic `RevlogEntry`.
94 pub(crate) revlog_entry: RevlogEntry<'changelog>,
96 pub(crate) revlog_entry: RevlogEntry<'changelog>,
95 }
97 }
96
98
97 impl<'changelog> ChangelogEntry<'changelog> {
99 impl<'changelog> ChangelogEntry<'changelog> {
98 pub fn data<'a>(
100 pub fn data<'a>(
99 &'a self,
101 &'a self,
100 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
102 ) -> Result<ChangelogRevisionData<'changelog>, RevlogError> {
101 let bytes = self.revlog_entry.data()?;
103 let bytes = self.revlog_entry.data()?;
102 if bytes.is_empty() {
104 if bytes.is_empty() {
103 Ok(ChangelogRevisionData::null())
105 Ok(ChangelogRevisionData::null())
104 } else {
106 } else {
105 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
107 Ok(ChangelogRevisionData::new(bytes).map_err(|err| {
106 RevlogError::Other(HgError::CorruptedRepository(format!(
108 RevlogError::Other(HgError::CorruptedRepository(format!(
107 "Invalid changelog data for revision {}: {:?}",
109 "Invalid changelog data for revision {}: {:?}",
108 self.revlog_entry.revision(),
110 self.revlog_entry.revision(),
109 err
111 err
110 )))
112 )))
111 })?)
113 })?)
112 }
114 }
113 }
115 }
114
116
115 /// Obtain a reference to the underlying `RevlogEntry`.
117 /// Obtain a reference to the underlying `RevlogEntry`.
116 ///
118 ///
117 /// This allows the caller to access the information that is common
119 /// This allows the caller to access the information that is common
118 /// to all revlog entries: revision number, node id, parent revisions etc.
120 /// to all revlog entries: revision number, node id, parent revisions etc.
119 pub fn as_revlog_entry(&self) -> &RevlogEntry {
121 pub fn as_revlog_entry(&self) -> &RevlogEntry {
120 &self.revlog_entry
122 &self.revlog_entry
121 }
123 }
122
124
123 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
125 pub fn p1_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
124 Ok(self
126 Ok(self
125 .revlog_entry
127 .revlog_entry
126 .p1_entry()?
128 .p1_entry()?
127 .map(|revlog_entry| Self { revlog_entry }))
129 .map(|revlog_entry| Self { revlog_entry }))
128 }
130 }
129
131
130 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
132 pub fn p2_entry(&self) -> Result<Option<ChangelogEntry>, RevlogError> {
131 Ok(self
133 Ok(self
132 .revlog_entry
134 .revlog_entry
133 .p2_entry()?
135 .p2_entry()?
134 .map(|revlog_entry| Self { revlog_entry }))
136 .map(|revlog_entry| Self { revlog_entry }))
135 }
137 }
136 }
138 }
137
139
138 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
140 /// `Changelog` entry which knows how to interpret the `changelog` data bytes.
139 #[derive(PartialEq)]
141 #[derive(PartialEq)]
140 pub struct ChangelogRevisionData<'changelog> {
142 pub struct ChangelogRevisionData<'changelog> {
141 /// The data bytes of the `changelog` entry.
143 /// The data bytes of the `changelog` entry.
142 bytes: Cow<'changelog, [u8]>,
144 bytes: Cow<'changelog, [u8]>,
143 /// The end offset for the hex manifest (not including the newline)
145 /// The end offset for the hex manifest (not including the newline)
144 manifest_end: usize,
146 manifest_end: usize,
145 /// The end offset for the user+email (not including the newline)
147 /// The end offset for the user+email (not including the newline)
146 user_end: usize,
148 user_end: usize,
147 /// The end offset for the timestamp+timezone+extras (not including the
149 /// The end offset for the timestamp+timezone+extras (not including the
148 /// newline)
150 /// newline)
149 timestamp_end: usize,
151 timestamp_end: usize,
150 /// The end offset for the file list (not including the newline)
152 /// The end offset for the file list (not including the newline)
151 files_end: usize,
153 files_end: usize,
152 }
154 }
153
155
154 impl<'changelog> ChangelogRevisionData<'changelog> {
156 impl<'changelog> ChangelogRevisionData<'changelog> {
155 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
157 fn new(bytes: Cow<'changelog, [u8]>) -> Result<Self, HgError> {
156 let mut line_iter = bytes.split(|b| b == &b'\n');
158 let mut line_iter = bytes.split(|b| b == &b'\n');
157 let manifest_end = line_iter
159 let manifest_end = line_iter
158 .next()
160 .next()
159 .expect("Empty iterator from split()?")
161 .expect("Empty iterator from split()?")
160 .len();
162 .len();
161 let user_slice = line_iter.next().ok_or_else(|| {
163 let user_slice = line_iter.next().ok_or_else(|| {
162 HgError::corrupted("Changeset data truncated after manifest line")
164 HgError::corrupted("Changeset data truncated after manifest line")
163 })?;
165 })?;
164 let user_end = manifest_end + 1 + user_slice.len();
166 let user_end = manifest_end + 1 + user_slice.len();
165 let timestamp_slice = line_iter.next().ok_or_else(|| {
167 let timestamp_slice = line_iter.next().ok_or_else(|| {
166 HgError::corrupted("Changeset data truncated after user line")
168 HgError::corrupted("Changeset data truncated after user line")
167 })?;
169 })?;
168 let timestamp_end = user_end + 1 + timestamp_slice.len();
170 let timestamp_end = user_end + 1 + timestamp_slice.len();
169 let mut files_end = timestamp_end + 1;
171 let mut files_end = timestamp_end + 1;
170 loop {
172 loop {
171 let line = line_iter.next().ok_or_else(|| {
173 let line = line_iter.next().ok_or_else(|| {
172 HgError::corrupted("Changeset data truncated in files list")
174 HgError::corrupted("Changeset data truncated in files list")
173 })?;
175 })?;
174 if line.is_empty() {
176 if line.is_empty() {
175 if files_end == bytes.len() {
177 if files_end == bytes.len() {
176 // The list of files ended with a single newline (there
178 // The list of files ended with a single newline (there
177 // should be two)
179 // should be two)
178 return Err(HgError::corrupted(
180 return Err(HgError::corrupted(
179 "Changeset data truncated after files list",
181 "Changeset data truncated after files list",
180 ));
182 ));
181 }
183 }
182 files_end -= 1;
184 files_end -= 1;
183 break;
185 break;
184 }
186 }
185 files_end += line.len() + 1;
187 files_end += line.len() + 1;
186 }
188 }
187
189
188 Ok(Self {
190 Ok(Self {
189 bytes,
191 bytes,
190 manifest_end,
192 manifest_end,
191 user_end,
193 user_end,
192 timestamp_end,
194 timestamp_end,
193 files_end,
195 files_end,
194 })
196 })
195 }
197 }
196
198
197 fn null() -> Self {
199 fn null() -> Self {
198 Self::new(Cow::Borrowed(
200 Self::new(Cow::Borrowed(
199 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
201 b"0000000000000000000000000000000000000000\n\n0 0\n\n",
200 ))
202 ))
201 .unwrap()
203 .unwrap()
202 }
204 }
203
205
204 /// Return an iterator over the lines of the entry.
206 /// Return an iterator over the lines of the entry.
205 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
207 pub fn lines(&self) -> impl Iterator<Item = &[u8]> {
206 self.bytes.split(|b| b == &b'\n')
208 self.bytes.split(|b| b == &b'\n')
207 }
209 }
208
210
209 /// Return the node id of the `manifest` referenced by this `changelog`
211 /// Return the node id of the `manifest` referenced by this `changelog`
210 /// entry.
212 /// entry.
211 pub fn manifest_node(&self) -> Result<Node, HgError> {
213 pub fn manifest_node(&self) -> Result<Node, HgError> {
212 let manifest_node_hex = &self.bytes[..self.manifest_end];
214 let manifest_node_hex = &self.bytes[..self.manifest_end];
213 Node::from_hex_for_repo(manifest_node_hex)
215 Node::from_hex_for_repo(manifest_node_hex)
214 }
216 }
215
217
216 /// The full user string (usually a name followed by an email enclosed in
218 /// The full user string (usually a name followed by an email enclosed in
217 /// angle brackets)
219 /// angle brackets)
218 pub fn user(&self) -> &[u8] {
220 pub fn user(&self) -> &[u8] {
219 &self.bytes[self.manifest_end + 1..self.user_end]
221 &self.bytes[self.manifest_end + 1..self.user_end]
220 }
222 }
221
223
222 /// The full timestamp line (timestamp in seconds, offset in seconds, and
224 /// The full timestamp line (timestamp in seconds, offset in seconds, and
223 /// possibly extras)
225 /// possibly extras)
224 // TODO: We should expose this in a more useful way
226 // TODO: We should expose this in a more useful way
225 pub fn timestamp_line(&self) -> &[u8] {
227 pub fn timestamp_line(&self) -> &[u8] {
226 &self.bytes[self.user_end + 1..self.timestamp_end]
228 &self.bytes[self.user_end + 1..self.timestamp_end]
227 }
229 }
228
230
229 /// The files changed in this revision.
231 /// The files changed in this revision.
230 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
232 pub fn files(&self) -> impl Iterator<Item = &HgPath> {
231 self.bytes[self.timestamp_end + 1..self.files_end]
233 self.bytes[self.timestamp_end + 1..self.files_end]
232 .split(|b| b == &b'\n')
234 .split(|b| b == &b'\n')
233 .map(HgPath::new)
235 .map(HgPath::new)
234 }
236 }
235
237
236 /// The change description.
238 /// The change description.
237 pub fn description(&self) -> &[u8] {
239 pub fn description(&self) -> &[u8] {
238 &self.bytes[self.files_end + 2..]
240 &self.bytes[self.files_end + 2..]
239 }
241 }
240 }
242 }
241
243
242 impl Debug for ChangelogRevisionData<'_> {
244 impl Debug for ChangelogRevisionData<'_> {
243 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
245 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
244 f.debug_struct("ChangelogRevisionData")
246 f.debug_struct("ChangelogRevisionData")
245 .field("bytes", &debug_bytes(&self.bytes))
247 .field("bytes", &debug_bytes(&self.bytes))
246 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
248 .field("manifest", &debug_bytes(&self.bytes[..self.manifest_end]))
247 .field(
249 .field(
248 "user",
250 "user",
249 &debug_bytes(
251 &debug_bytes(
250 &self.bytes[self.manifest_end + 1..self.user_end],
252 &self.bytes[self.manifest_end + 1..self.user_end],
251 ),
253 ),
252 )
254 )
253 .field(
255 .field(
254 "timestamp",
256 "timestamp",
255 &debug_bytes(
257 &debug_bytes(
256 &self.bytes[self.user_end + 1..self.timestamp_end],
258 &self.bytes[self.user_end + 1..self.timestamp_end],
257 ),
259 ),
258 )
260 )
259 .field(
261 .field(
260 "files",
262 "files",
261 &debug_bytes(
263 &debug_bytes(
262 &self.bytes[self.timestamp_end + 1..self.files_end],
264 &self.bytes[self.timestamp_end + 1..self.files_end],
263 ),
265 ),
264 )
266 )
265 .field(
267 .field(
266 "description",
268 "description",
267 &debug_bytes(&self.bytes[self.files_end + 2..]),
269 &debug_bytes(&self.bytes[self.files_end + 2..]),
268 )
270 )
269 .finish()
271 .finish()
270 }
272 }
271 }
273 }
272
274
273 fn debug_bytes(bytes: &[u8]) -> String {
275 fn debug_bytes(bytes: &[u8]) -> String {
274 String::from_utf8_lossy(
276 String::from_utf8_lossy(
275 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
277 &bytes.iter().flat_map(|b| escape_default(*b)).collect_vec(),
276 )
278 )
277 .to_string()
279 .to_string()
278 }
280 }
279
281
280 #[cfg(test)]
282 #[cfg(test)]
281 mod tests {
283 mod tests {
282 use super::*;
284 use super::*;
283 use crate::vfs::Vfs;
285 use crate::vfs::Vfs;
284 use crate::NULL_REVISION;
286 use crate::NULL_REVISION;
285 use pretty_assertions::assert_eq;
287 use pretty_assertions::assert_eq;
286
288
287 #[test]
289 #[test]
288 fn test_create_changelogrevisiondata_invalid() {
290 fn test_create_changelogrevisiondata_invalid() {
289 // Completely empty
291 // Completely empty
290 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
292 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
291 // No newline after manifest
293 // No newline after manifest
292 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
294 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd")).is_err());
293 // No newline after user
295 // No newline after user
294 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
296 assert!(ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n")).is_err());
295 // No newline after timestamp
297 // No newline after timestamp
296 assert!(
298 assert!(
297 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
299 ChangelogRevisionData::new(Cow::Borrowed(b"abcd\n\n0 0")).is_err()
298 );
300 );
299 // Missing newline after files
301 // Missing newline after files
300 assert!(ChangelogRevisionData::new(Cow::Borrowed(
302 assert!(ChangelogRevisionData::new(Cow::Borrowed(
301 b"abcd\n\n0 0\nfile1\nfile2"
303 b"abcd\n\n0 0\nfile1\nfile2"
302 ))
304 ))
303 .is_err(),);
305 .is_err(),);
304 // Only one newline after files
306 // Only one newline after files
305 assert!(ChangelogRevisionData::new(Cow::Borrowed(
307 assert!(ChangelogRevisionData::new(Cow::Borrowed(
306 b"abcd\n\n0 0\nfile1\nfile2\n"
308 b"abcd\n\n0 0\nfile1\nfile2\n"
307 ))
309 ))
308 .is_err(),);
310 .is_err(),);
309 }
311 }
310
312
311 #[test]
313 #[test]
312 fn test_create_changelogrevisiondata() {
314 fn test_create_changelogrevisiondata() {
313 let data = ChangelogRevisionData::new(Cow::Borrowed(
315 let data = ChangelogRevisionData::new(Cow::Borrowed(
314 b"0123456789abcdef0123456789abcdef01234567
316 b"0123456789abcdef0123456789abcdef01234567
315 Some One <someone@example.com>
317 Some One <someone@example.com>
316 0 0
318 0 0
317 file1
319 file1
318 file2
320 file2
319
321
320 some
322 some
321 commit
323 commit
322 message",
324 message",
323 ))
325 ))
324 .unwrap();
326 .unwrap();
325 assert_eq!(
327 assert_eq!(
326 data.manifest_node().unwrap(),
328 data.manifest_node().unwrap(),
327 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
329 Node::from_hex("0123456789abcdef0123456789abcdef01234567")
328 .unwrap()
330 .unwrap()
329 );
331 );
330 assert_eq!(data.user(), b"Some One <someone@example.com>");
332 assert_eq!(data.user(), b"Some One <someone@example.com>");
331 assert_eq!(data.timestamp_line(), b"0 0");
333 assert_eq!(data.timestamp_line(), b"0 0");
332 assert_eq!(
334 assert_eq!(
333 data.files().collect_vec(),
335 data.files().collect_vec(),
334 vec![HgPath::new("file1"), HgPath::new("file2")]
336 vec![HgPath::new("file1"), HgPath::new("file2")]
335 );
337 );
336 assert_eq!(data.description(), b"some\ncommit\nmessage");
338 assert_eq!(data.description(), b"some\ncommit\nmessage");
337 }
339 }
338
340
339 #[test]
341 #[test]
340 fn test_data_from_rev_null() -> Result<(), RevlogError> {
342 fn test_data_from_rev_null() -> Result<(), RevlogError> {
341 // an empty revlog will be enough for this case
343 // an empty revlog will be enough for this case
342 let temp = tempfile::tempdir().unwrap();
344 let temp = tempfile::tempdir().unwrap();
343 let vfs = Vfs { base: temp.path() };
345 let vfs = Vfs { base: temp.path() };
344 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
346 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
345 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
347 let revlog =
348 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
349 .unwrap();
346
350
347 let changelog = Changelog { revlog };
351 let changelog = Changelog { revlog };
348 assert_eq!(
352 assert_eq!(
349 changelog.data_for_rev(NULL_REVISION.into())?,
353 changelog.data_for_rev(NULL_REVISION.into())?,
350 ChangelogRevisionData::null()
354 ChangelogRevisionData::null()
351 );
355 );
352 // same with the intermediate entry object
356 // same with the intermediate entry object
353 assert_eq!(
357 assert_eq!(
354 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
358 changelog.entry_for_rev(NULL_REVISION.into())?.data()?,
355 ChangelogRevisionData::null()
359 ChangelogRevisionData::null()
356 );
360 );
357 Ok(())
361 Ok(())
358 }
362 }
359 }
363 }
@@ -1,239 +1,245 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::exit_codes;
2 use crate::exit_codes;
3 use crate::repo::Repo;
3 use crate::repo::Repo;
4 use crate::revlog::path_encode::path_encode;
4 use crate::revlog::path_encode::path_encode;
5 use crate::revlog::NodePrefix;
5 use crate::revlog::NodePrefix;
6 use crate::revlog::Revision;
6 use crate::revlog::Revision;
7 use crate::revlog::RevlogEntry;
7 use crate::revlog::RevlogEntry;
8 use crate::revlog::{Revlog, RevlogError};
8 use crate::revlog::{Revlog, RevlogError};
9 use crate::utils::files::get_path_from_bytes;
9 use crate::utils::files::get_path_from_bytes;
10 use crate::utils::hg_path::HgPath;
10 use crate::utils::hg_path::HgPath;
11 use crate::utils::SliceExt;
11 use crate::utils::SliceExt;
12 use crate::Graph;
12 use crate::Graph;
13 use crate::GraphError;
13 use crate::GraphError;
14 use crate::RevlogOpenOptions;
14 use crate::UncheckedRevision;
15 use crate::UncheckedRevision;
15 use std::path::PathBuf;
16 use std::path::PathBuf;
16
17
17 /// A specialized `Revlog` to work with file data logs.
18 /// A specialized `Revlog` to work with file data logs.
18 pub struct Filelog {
19 pub struct Filelog {
19 /// The generic `revlog` format.
20 /// The generic `revlog` format.
20 revlog: Revlog,
21 revlog: Revlog,
21 }
22 }
22
23
23 impl Graph for Filelog {
24 impl Graph for Filelog {
24 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
25 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
25 self.revlog.parents(rev)
26 self.revlog.parents(rev)
26 }
27 }
27 }
28 }
28
29
29 impl Filelog {
30 impl Filelog {
30 pub fn open_vfs(
31 pub fn open_vfs(
31 store_vfs: &crate::vfs::Vfs<'_>,
32 store_vfs: &crate::vfs::Vfs<'_>,
32 file_path: &HgPath,
33 file_path: &HgPath,
34 options: RevlogOpenOptions,
33 ) -> Result<Self, HgError> {
35 ) -> Result<Self, HgError> {
34 let index_path = store_path(file_path, b".i");
36 let index_path = store_path(file_path, b".i");
35 let data_path = store_path(file_path, b".d");
37 let data_path = store_path(file_path, b".d");
36 let revlog =
38 let revlog =
37 Revlog::open(store_vfs, index_path, Some(&data_path), false)?;
39 Revlog::open(store_vfs, index_path, Some(&data_path), options)?;
38 Ok(Self { revlog })
40 Ok(Self { revlog })
39 }
41 }
40
42
41 pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> {
43 pub fn open(
42 Self::open_vfs(&repo.store_vfs(), file_path)
44 repo: &Repo,
45 file_path: &HgPath,
46 options: RevlogOpenOptions,
47 ) -> Result<Self, HgError> {
48 Self::open_vfs(&repo.store_vfs(), file_path, options)
43 }
49 }
44
50
45 /// The given node ID is that of the file as found in a filelog, not of a
51 /// The given node ID is that of the file as found in a filelog, not of a
46 /// changeset.
52 /// changeset.
47 pub fn data_for_node(
53 pub fn data_for_node(
48 &self,
54 &self,
49 file_node: impl Into<NodePrefix>,
55 file_node: impl Into<NodePrefix>,
50 ) -> Result<FilelogRevisionData, RevlogError> {
56 ) -> Result<FilelogRevisionData, RevlogError> {
51 let file_rev = self.revlog.rev_from_node(file_node.into())?;
57 let file_rev = self.revlog.rev_from_node(file_node.into())?;
52 self.data_for_rev(file_rev.into())
58 self.data_for_rev(file_rev.into())
53 }
59 }
54
60
55 /// The given revision is that of the file as found in a filelog, not of a
61 /// The given revision is that of the file as found in a filelog, not of a
56 /// changeset.
62 /// changeset.
57 pub fn data_for_rev(
63 pub fn data_for_rev(
58 &self,
64 &self,
59 file_rev: UncheckedRevision,
65 file_rev: UncheckedRevision,
60 ) -> Result<FilelogRevisionData, RevlogError> {
66 ) -> Result<FilelogRevisionData, RevlogError> {
61 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
67 let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned();
62 Ok(FilelogRevisionData(data))
68 Ok(FilelogRevisionData(data))
63 }
69 }
64
70
65 /// The given node ID is that of the file as found in a filelog, not of a
71 /// The given node ID is that of the file as found in a filelog, not of a
66 /// changeset.
72 /// changeset.
67 pub fn entry_for_node(
73 pub fn entry_for_node(
68 &self,
74 &self,
69 file_node: impl Into<NodePrefix>,
75 file_node: impl Into<NodePrefix>,
70 ) -> Result<FilelogEntry, RevlogError> {
76 ) -> Result<FilelogEntry, RevlogError> {
71 let file_rev = self.revlog.rev_from_node(file_node.into())?;
77 let file_rev = self.revlog.rev_from_node(file_node.into())?;
72 self.entry_for_checked_rev(file_rev)
78 self.entry_for_checked_rev(file_rev)
73 }
79 }
74
80
75 /// The given revision is that of the file as found in a filelog, not of a
81 /// The given revision is that of the file as found in a filelog, not of a
76 /// changeset.
82 /// changeset.
77 pub fn entry_for_rev(
83 pub fn entry_for_rev(
78 &self,
84 &self,
79 file_rev: UncheckedRevision,
85 file_rev: UncheckedRevision,
80 ) -> Result<FilelogEntry, RevlogError> {
86 ) -> Result<FilelogEntry, RevlogError> {
81 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
87 Ok(FilelogEntry(self.revlog.get_entry(file_rev)?))
82 }
88 }
83
89
84 fn entry_for_checked_rev(
90 fn entry_for_checked_rev(
85 &self,
91 &self,
86 file_rev: Revision,
92 file_rev: Revision,
87 ) -> Result<FilelogEntry, RevlogError> {
93 ) -> Result<FilelogEntry, RevlogError> {
88 Ok(FilelogEntry(
94 Ok(FilelogEntry(
89 self.revlog.get_entry_for_checked_rev(file_rev)?,
95 self.revlog.get_entry_for_checked_rev(file_rev)?,
90 ))
96 ))
91 }
97 }
92 }
98 }
93
99
94 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
100 fn store_path(hg_path: &HgPath, suffix: &[u8]) -> PathBuf {
95 let encoded_bytes =
101 let encoded_bytes =
96 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
102 path_encode(&[b"data/", hg_path.as_bytes(), suffix].concat());
97 get_path_from_bytes(&encoded_bytes).into()
103 get_path_from_bytes(&encoded_bytes).into()
98 }
104 }
99
105
100 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
106 pub struct FilelogEntry<'a>(RevlogEntry<'a>);
101
107
102 impl FilelogEntry<'_> {
108 impl FilelogEntry<'_> {
103 /// `self.data()` can be expensive, with decompression and delta
109 /// `self.data()` can be expensive, with decompression and delta
104 /// resolution.
110 /// resolution.
105 ///
111 ///
106 /// *Without* paying this cost, based on revlog index information
112 /// *Without* paying this cost, based on revlog index information
107 /// including `RevlogEntry::uncompressed_len`:
113 /// including `RevlogEntry::uncompressed_len`:
108 ///
114 ///
109 /// * Returns `true` if the length that `self.data().file_data().len()`
115 /// * Returns `true` if the length that `self.data().file_data().len()`
110 /// would return is definitely **not equal** to `other_len`.
116 /// would return is definitely **not equal** to `other_len`.
111 /// * Returns `false` if available information is inconclusive.
117 /// * Returns `false` if available information is inconclusive.
112 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
118 pub fn file_data_len_not_equal_to(&self, other_len: u64) -> bool {
113 // Relevant code that implement this behavior in Python code:
119 // Relevant code that implement this behavior in Python code:
114 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
120 // basefilectx.cmp, filelog.size, storageutil.filerevisioncopied,
115 // revlog.size, revlog.rawsize
121 // revlog.size, revlog.rawsize
116
122
117 // Let’s call `file_data_len` what would be returned by
123 // Let’s call `file_data_len` what would be returned by
118 // `self.data().file_data().len()`.
124 // `self.data().file_data().len()`.
119
125
120 if self.0.is_censored() {
126 if self.0.is_censored() {
121 let file_data_len = 0;
127 let file_data_len = 0;
122 return other_len != file_data_len;
128 return other_len != file_data_len;
123 }
129 }
124
130
125 if self.0.has_length_affecting_flag_processor() {
131 if self.0.has_length_affecting_flag_processor() {
126 // We can’t conclude anything about `file_data_len`.
132 // We can’t conclude anything about `file_data_len`.
127 return false;
133 return false;
128 }
134 }
129
135
130 // Revlog revisions (usually) have metadata for the size of
136 // Revlog revisions (usually) have metadata for the size of
131 // their data after decompression and delta resolution
137 // their data after decompression and delta resolution
132 // as would be returned by `Revlog::get_rev_data`.
138 // as would be returned by `Revlog::get_rev_data`.
133 //
139 //
134 // For filelogs this is the file’s contents preceded by an optional
140 // For filelogs this is the file’s contents preceded by an optional
135 // metadata block.
141 // metadata block.
136 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
142 let uncompressed_len = if let Some(l) = self.0.uncompressed_len() {
137 l as u64
143 l as u64
138 } else {
144 } else {
139 // The field was set to -1, the actual uncompressed len is unknown.
145 // The field was set to -1, the actual uncompressed len is unknown.
140 // We need to decompress to say more.
146 // We need to decompress to say more.
141 return false;
147 return false;
142 };
148 };
143 // `uncompressed_len = file_data_len + optional_metadata_len`,
149 // `uncompressed_len = file_data_len + optional_metadata_len`,
144 // so `file_data_len <= uncompressed_len`.
150 // so `file_data_len <= uncompressed_len`.
145 if uncompressed_len < other_len {
151 if uncompressed_len < other_len {
146 // Transitively, `file_data_len < other_len`.
152 // Transitively, `file_data_len < other_len`.
147 // So `other_len != file_data_len` definitely.
153 // So `other_len != file_data_len` definitely.
148 return true;
154 return true;
149 }
155 }
150
156
151 if uncompressed_len == other_len + 4 {
157 if uncompressed_len == other_len + 4 {
152 // It’s possible that `file_data_len == other_len` with an empty
158 // It’s possible that `file_data_len == other_len` with an empty
153 // metadata block (2 start marker bytes + 2 end marker bytes).
159 // metadata block (2 start marker bytes + 2 end marker bytes).
154 // This happens when there wouldn’t otherwise be metadata, but
160 // This happens when there wouldn’t otherwise be metadata, but
155 // the first 2 bytes of file data happen to match a start marker
161 // the first 2 bytes of file data happen to match a start marker
156 // and would be ambiguous.
162 // and would be ambiguous.
157 return false;
163 return false;
158 }
164 }
159
165
160 if !self.0.has_p1() {
166 if !self.0.has_p1() {
161 // There may or may not be copy metadata, so we can’t deduce more
167 // There may or may not be copy metadata, so we can’t deduce more
162 // about `file_data_len` without computing file data.
168 // about `file_data_len` without computing file data.
163 return false;
169 return false;
164 }
170 }
165
171
166 // Filelog ancestry is not meaningful in the way changelog ancestry is.
172 // Filelog ancestry is not meaningful in the way changelog ancestry is.
167 // It only provides hints to delta generation.
173 // It only provides hints to delta generation.
168 // p1 and p2 are set to null when making a copy or rename since
174 // p1 and p2 are set to null when making a copy or rename since
169 // contents are likely unrelatedto what might have previously existed
175 // contents are likely unrelatedto what might have previously existed
170 // at the destination path.
176 // at the destination path.
171 //
177 //
172 // Conversely, since here p1 is non-null, there is no copy metadata.
178 // Conversely, since here p1 is non-null, there is no copy metadata.
173 // Note that this reasoning may be invalidated in the presence of
179 // Note that this reasoning may be invalidated in the presence of
174 // merges made by some previous versions of Mercurial that
180 // merges made by some previous versions of Mercurial that
175 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
181 // swapped p1 and p2. See <https://bz.mercurial-scm.org/show_bug.cgi?id=6528>
176 // and `tests/test-issue6528.t`.
182 // and `tests/test-issue6528.t`.
177 //
183 //
178 // Since copy metadata is currently the only kind of metadata
184 // Since copy metadata is currently the only kind of metadata
179 // kept in revlog data of filelogs,
185 // kept in revlog data of filelogs,
180 // this `FilelogEntry` does not have such metadata:
186 // this `FilelogEntry` does not have such metadata:
181 let file_data_len = uncompressed_len;
187 let file_data_len = uncompressed_len;
182
188
183 file_data_len != other_len
189 file_data_len != other_len
184 }
190 }
185
191
186 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
192 pub fn data(&self) -> Result<FilelogRevisionData, HgError> {
187 let data = self.0.data();
193 let data = self.0.data();
188 match data {
194 match data {
189 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
195 Ok(data) => Ok(FilelogRevisionData(data.into_owned())),
190 // Errors other than `HgError` should not happen at this point
196 // Errors other than `HgError` should not happen at this point
191 Err(e) => match e {
197 Err(e) => match e {
192 RevlogError::Other(hg_error) => Err(hg_error),
198 RevlogError::Other(hg_error) => Err(hg_error),
193 revlog_error => Err(HgError::abort(
199 revlog_error => Err(HgError::abort(
194 revlog_error.to_string(),
200 revlog_error.to_string(),
195 exit_codes::ABORT,
201 exit_codes::ABORT,
196 None,
202 None,
197 )),
203 )),
198 },
204 },
199 }
205 }
200 }
206 }
201 }
207 }
202
208
203 /// The data for one revision in a filelog, uncompressed and delta-resolved.
209 /// The data for one revision in a filelog, uncompressed and delta-resolved.
204 pub struct FilelogRevisionData(Vec<u8>);
210 pub struct FilelogRevisionData(Vec<u8>);
205
211
206 impl FilelogRevisionData {
212 impl FilelogRevisionData {
207 /// Split into metadata and data
213 /// Split into metadata and data
208 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
214 pub fn split(&self) -> Result<(Option<&[u8]>, &[u8]), HgError> {
209 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
215 const DELIMITER: &[u8; 2] = &[b'\x01', b'\n'];
210
216
211 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
217 if let Some(rest) = self.0.drop_prefix(DELIMITER) {
212 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
218 if let Some((metadata, data)) = rest.split_2_by_slice(DELIMITER) {
213 Ok((Some(metadata), data))
219 Ok((Some(metadata), data))
214 } else {
220 } else {
215 Err(HgError::corrupted(
221 Err(HgError::corrupted(
216 "Missing metadata end delimiter in filelog entry",
222 "Missing metadata end delimiter in filelog entry",
217 ))
223 ))
218 }
224 }
219 } else {
225 } else {
220 Ok((None, &self.0))
226 Ok((None, &self.0))
221 }
227 }
222 }
228 }
223
229
224 /// Returns the file contents at this revision, stripped of any metadata
230 /// Returns the file contents at this revision, stripped of any metadata
225 pub fn file_data(&self) -> Result<&[u8], HgError> {
231 pub fn file_data(&self) -> Result<&[u8], HgError> {
226 let (_metadata, data) = self.split()?;
232 let (_metadata, data) = self.split()?;
227 Ok(data)
233 Ok(data)
228 }
234 }
229
235
230 /// Consume the entry, and convert it into data, discarding any metadata,
236 /// Consume the entry, and convert it into data, discarding any metadata,
231 /// if present.
237 /// if present.
232 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
238 pub fn into_file_data(self) -> Result<Vec<u8>, HgError> {
233 if let (Some(_metadata), data) = self.split()? {
239 if let (Some(_metadata), data) = self.split()? {
234 Ok(data.to_owned())
240 Ok(data.to_owned())
235 } else {
241 } else {
236 Ok(self.0)
242 Ok(self.0)
237 }
243 }
238 }
244 }
239 }
245 }
@@ -1,775 +1,770 b''
1 use std::fmt::Debug;
1 use std::fmt::Debug;
2 use std::ops::Deref;
2 use std::ops::Deref;
3
3
4 use byteorder::{BigEndian, ByteOrder};
4 use byteorder::{BigEndian, ByteOrder};
5 use bytes_cast::{unaligned, BytesCast};
5 use bytes_cast::{unaligned, BytesCast};
6
6
7 use super::REVIDX_KNOWN_FLAGS;
7 use super::REVIDX_KNOWN_FLAGS;
8 use crate::errors::HgError;
8 use crate::errors::HgError;
9 use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES};
9 use crate::node::{NODE_BYTES_LENGTH, STORED_NODE_ID_BYTES};
10 use crate::revlog::node::Node;
10 use crate::revlog::node::Node;
11 use crate::revlog::{Revision, NULL_REVISION};
11 use crate::revlog::{Revision, NULL_REVISION};
12 use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision};
12 use crate::{Graph, GraphError, RevlogError, RevlogIndex, UncheckedRevision};
13
13
14 pub const INDEX_ENTRY_SIZE: usize = 64;
14 pub const INDEX_ENTRY_SIZE: usize = 64;
15 pub const COMPRESSION_MODE_INLINE: u8 = 2;
15 pub const COMPRESSION_MODE_INLINE: u8 = 2;
16
16
17 pub struct IndexHeader {
17 pub struct IndexHeader {
18 header_bytes: [u8; 4],
18 pub(super) header_bytes: [u8; 4],
19 }
19 }
20
20
21 #[derive(Copy, Clone)]
21 #[derive(Copy, Clone)]
22 pub struct IndexHeaderFlags {
22 pub struct IndexHeaderFlags {
23 flags: u16,
23 flags: u16,
24 }
24 }
25
25
26 /// Corresponds to the high bits of `_format_flags` in python
26 /// Corresponds to the high bits of `_format_flags` in python
27 impl IndexHeaderFlags {
27 impl IndexHeaderFlags {
28 /// Corresponds to FLAG_INLINE_DATA in python
28 /// Corresponds to FLAG_INLINE_DATA in python
29 pub fn is_inline(self) -> bool {
29 pub fn is_inline(self) -> bool {
30 self.flags & 1 != 0
30 self.flags & 1 != 0
31 }
31 }
32 /// Corresponds to FLAG_GENERALDELTA in python
32 /// Corresponds to FLAG_GENERALDELTA in python
33 pub fn uses_generaldelta(self) -> bool {
33 pub fn uses_generaldelta(self) -> bool {
34 self.flags & 2 != 0
34 self.flags & 2 != 0
35 }
35 }
36 }
36 }
37
37
38 /// Corresponds to the INDEX_HEADER structure,
38 /// Corresponds to the INDEX_HEADER structure,
39 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
39 /// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
40 impl IndexHeader {
40 impl IndexHeader {
41 fn format_flags(&self) -> IndexHeaderFlags {
41 fn format_flags(&self) -> IndexHeaderFlags {
42 // No "unknown flags" check here, unlike in python. Maybe there should
42 // No "unknown flags" check here, unlike in python. Maybe there should
43 // be.
43 // be.
44 IndexHeaderFlags {
44 IndexHeaderFlags {
45 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
45 flags: BigEndian::read_u16(&self.header_bytes[0..2]),
46 }
46 }
47 }
47 }
48
48
49 /// The only revlog version currently supported by rhg.
49 /// The only revlog version currently supported by rhg.
50 const REVLOGV1: u16 = 1;
50 const REVLOGV1: u16 = 1;
51
51
52 /// Corresponds to `_format_version` in Python.
52 /// Corresponds to `_format_version` in Python.
53 fn format_version(&self) -> u16 {
53 fn format_version(&self) -> u16 {
54 BigEndian::read_u16(&self.header_bytes[2..4])
54 BigEndian::read_u16(&self.header_bytes[2..4])
55 }
55 }
56
56
57 const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
57 pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> {
58 // We treat an empty file as a valid index with no entries.
59 // Here we make an arbitrary choice of what we assume the format of the
60 // index to be (V1, using generaldelta).
61 // This doesn't matter too much, since we're only doing read-only
62 // access. but the value corresponds to the `new_header` variable in
63 // `revlog.py`, `_loadindex`
64 header_bytes: [0, 3, 0, 1],
65 };
66
67 fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
68 if index_bytes.is_empty() {
58 if index_bytes.is_empty() {
69 return Ok(IndexHeader::EMPTY_INDEX_HEADER);
59 return Ok(None);
70 }
60 }
71 if index_bytes.len() < 4 {
61 if index_bytes.len() < 4 {
72 return Err(HgError::corrupted(
62 return Err(HgError::corrupted(
73 "corrupted revlog: can't read the index format header",
63 "corrupted revlog: can't read the index format header",
74 ));
64 ));
75 }
65 }
76 Ok(IndexHeader {
66 Ok(Some(IndexHeader {
77 header_bytes: {
67 header_bytes: {
78 let bytes: [u8; 4] =
68 let bytes: [u8; 4] =
79 index_bytes[0..4].try_into().expect("impossible");
69 index_bytes[0..4].try_into().expect("impossible");
80 bytes
70 bytes
81 },
71 },
82 })
72 }))
83 }
73 }
84 }
74 }
85
75
86 /// Abstracts the access to the index bytes since they can be spread between
76 /// Abstracts the access to the index bytes since they can be spread between
87 /// the immutable (bytes) part and the mutable (added) part if any appends
77 /// the immutable (bytes) part and the mutable (added) part if any appends
88 /// happened. This makes it transparent for the callers.
78 /// happened. This makes it transparent for the callers.
89 struct IndexData {
79 struct IndexData {
90 /// Immutable bytes, most likely taken from disk
80 /// Immutable bytes, most likely taken from disk
91 bytes: Box<dyn Deref<Target = [u8]> + Send>,
81 bytes: Box<dyn Deref<Target = [u8]> + Send>,
92 /// Bytes that were added after reading the index
82 /// Bytes that were added after reading the index
93 added: Vec<u8>,
83 added: Vec<u8>,
94 }
84 }
95
85
96 impl IndexData {
86 impl IndexData {
97 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
87 pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send>) -> Self {
98 Self {
88 Self {
99 bytes,
89 bytes,
100 added: vec![],
90 added: vec![],
101 }
91 }
102 }
92 }
103
93
104 pub fn len(&self) -> usize {
94 pub fn len(&self) -> usize {
105 self.bytes.len() + self.added.len()
95 self.bytes.len() + self.added.len()
106 }
96 }
107 }
97 }
108
98
109 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
99 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
110 type Output = [u8];
100 type Output = [u8];
111
101
112 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
102 fn index(&self, index: std::ops::Range<usize>) -> &Self::Output {
113 let start = index.start;
103 let start = index.start;
114 let end = index.end;
104 let end = index.end;
115 let immutable_len = self.bytes.len();
105 let immutable_len = self.bytes.len();
116 if start < immutable_len {
106 if start < immutable_len {
117 if end > immutable_len {
107 if end > immutable_len {
118 panic!("index data cannot span existing and added ranges");
108 panic!("index data cannot span existing and added ranges");
119 }
109 }
120 &self.bytes[index]
110 &self.bytes[index]
121 } else {
111 } else {
122 &self.added[start - immutable_len..end - immutable_len]
112 &self.added[start - immutable_len..end - immutable_len]
123 }
113 }
124 }
114 }
125 }
115 }
126
116
127 pub struct RevisionDataParams {
117 pub struct RevisionDataParams {
128 flags: u16,
118 flags: u16,
129 data_offset: u64,
119 data_offset: u64,
130 data_compressed_length: i32,
120 data_compressed_length: i32,
131 data_uncompressed_length: i32,
121 data_uncompressed_length: i32,
132 data_delta_base: i32,
122 data_delta_base: i32,
133 link_rev: i32,
123 link_rev: i32,
134 parent_rev_1: i32,
124 parent_rev_1: i32,
135 parent_rev_2: i32,
125 parent_rev_2: i32,
136 node_id: [u8; NODE_BYTES_LENGTH],
126 node_id: [u8; NODE_BYTES_LENGTH],
137 _sidedata_offset: u64,
127 _sidedata_offset: u64,
138 _sidedata_compressed_length: i32,
128 _sidedata_compressed_length: i32,
139 data_compression_mode: u8,
129 data_compression_mode: u8,
140 _sidedata_compression_mode: u8,
130 _sidedata_compression_mode: u8,
141 _rank: i32,
131 _rank: i32,
142 }
132 }
143
133
144 #[derive(BytesCast)]
134 #[derive(BytesCast)]
145 #[repr(C)]
135 #[repr(C)]
146 pub struct RevisionDataV1 {
136 pub struct RevisionDataV1 {
147 data_offset_or_flags: unaligned::U64Be,
137 data_offset_or_flags: unaligned::U64Be,
148 data_compressed_length: unaligned::I32Be,
138 data_compressed_length: unaligned::I32Be,
149 data_uncompressed_length: unaligned::I32Be,
139 data_uncompressed_length: unaligned::I32Be,
150 data_delta_base: unaligned::I32Be,
140 data_delta_base: unaligned::I32Be,
151 link_rev: unaligned::I32Be,
141 link_rev: unaligned::I32Be,
152 parent_rev_1: unaligned::I32Be,
142 parent_rev_1: unaligned::I32Be,
153 parent_rev_2: unaligned::I32Be,
143 parent_rev_2: unaligned::I32Be,
154 node_id: [u8; STORED_NODE_ID_BYTES],
144 node_id: [u8; STORED_NODE_ID_BYTES],
155 }
145 }
156
146
157 fn _static_assert_size_of_revision_data_v1() {
147 fn _static_assert_size_of_revision_data_v1() {
158 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
148 let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>;
159 }
149 }
160
150
161 impl RevisionDataParams {
151 impl RevisionDataParams {
162 pub fn validate(&self) -> Result<(), RevlogError> {
152 pub fn validate(&self) -> Result<(), RevlogError> {
163 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
153 if self.flags & !REVIDX_KNOWN_FLAGS != 0 {
164 return Err(RevlogError::corrupted(format!(
154 return Err(RevlogError::corrupted(format!(
165 "unknown revlog index flags: {}",
155 "unknown revlog index flags: {}",
166 self.flags
156 self.flags
167 )));
157 )));
168 }
158 }
169 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
159 if self.data_compression_mode != COMPRESSION_MODE_INLINE {
170 return Err(RevlogError::corrupted(format!(
160 return Err(RevlogError::corrupted(format!(
171 "invalid data compression mode: {}",
161 "invalid data compression mode: {}",
172 self.data_compression_mode
162 self.data_compression_mode
173 )));
163 )));
174 }
164 }
175 // FIXME isn't this only for v2 or changelog v2?
165 // FIXME isn't this only for v2 or changelog v2?
176 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
166 if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE {
177 return Err(RevlogError::corrupted(format!(
167 return Err(RevlogError::corrupted(format!(
178 "invalid sidedata compression mode: {}",
168 "invalid sidedata compression mode: {}",
179 self._sidedata_compression_mode
169 self._sidedata_compression_mode
180 )));
170 )));
181 }
171 }
182 Ok(())
172 Ok(())
183 }
173 }
184
174
185 pub fn into_v1(self) -> RevisionDataV1 {
175 pub fn into_v1(self) -> RevisionDataV1 {
186 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
176 let data_offset_or_flags = self.data_offset << 16 | self.flags as u64;
187 let mut node_id = [0; STORED_NODE_ID_BYTES];
177 let mut node_id = [0; STORED_NODE_ID_BYTES];
188 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
178 node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id);
189 RevisionDataV1 {
179 RevisionDataV1 {
190 data_offset_or_flags: data_offset_or_flags.into(),
180 data_offset_or_flags: data_offset_or_flags.into(),
191 data_compressed_length: self.data_compressed_length.into(),
181 data_compressed_length: self.data_compressed_length.into(),
192 data_uncompressed_length: self.data_uncompressed_length.into(),
182 data_uncompressed_length: self.data_uncompressed_length.into(),
193 data_delta_base: self.data_delta_base.into(),
183 data_delta_base: self.data_delta_base.into(),
194 link_rev: self.link_rev.into(),
184 link_rev: self.link_rev.into(),
195 parent_rev_1: self.parent_rev_1.into(),
185 parent_rev_1: self.parent_rev_1.into(),
196 parent_rev_2: self.parent_rev_2.into(),
186 parent_rev_2: self.parent_rev_2.into(),
197 node_id,
187 node_id,
198 }
188 }
199 }
189 }
200 }
190 }
201
191
202 /// A Revlog index
192 /// A Revlog index
203 pub struct Index {
193 pub struct Index {
204 bytes: IndexData,
194 bytes: IndexData,
205 /// Offsets of starts of index blocks.
195 /// Offsets of starts of index blocks.
206 /// Only needed when the index is interleaved with data.
196 /// Only needed when the index is interleaved with data.
207 offsets: Option<Vec<usize>>,
197 offsets: Option<Vec<usize>>,
208 uses_generaldelta: bool,
198 uses_generaldelta: bool,
209 }
199 }
210
200
211 impl Debug for Index {
201 impl Debug for Index {
212 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
202 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
213 f.debug_struct("Index")
203 f.debug_struct("Index")
214 .field("offsets", &self.offsets)
204 .field("offsets", &self.offsets)
215 .field("uses_generaldelta", &self.uses_generaldelta)
205 .field("uses_generaldelta", &self.uses_generaldelta)
216 .finish()
206 .finish()
217 }
207 }
218 }
208 }
219
209
220 impl Graph for Index {
210 impl Graph for Index {
221 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
211 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
222 let err = || GraphError::ParentOutOfRange(rev);
212 let err = || GraphError::ParentOutOfRange(rev);
223 match self.get_entry(rev) {
213 match self.get_entry(rev) {
224 Some(entry) => {
214 Some(entry) => {
225 // The C implementation checks that the parents are valid
215 // The C implementation checks that the parents are valid
226 // before returning
216 // before returning
227 Ok([
217 Ok([
228 self.check_revision(entry.p1()).ok_or_else(err)?,
218 self.check_revision(entry.p1()).ok_or_else(err)?,
229 self.check_revision(entry.p2()).ok_or_else(err)?,
219 self.check_revision(entry.p2()).ok_or_else(err)?,
230 ])
220 ])
231 }
221 }
232 None => Ok([NULL_REVISION, NULL_REVISION]),
222 None => Ok([NULL_REVISION, NULL_REVISION]),
233 }
223 }
234 }
224 }
235 }
225 }
236
226
237 impl Index {
227 impl Index {
238 /// Create an index from bytes.
228 /// Create an index from bytes.
239 /// Calculate the start of each entry when is_inline is true.
229 /// Calculate the start of each entry when is_inline is true.
240 pub fn new(
230 pub fn new(
241 bytes: Box<dyn Deref<Target = [u8]> + Send>,
231 bytes: Box<dyn Deref<Target = [u8]> + Send>,
232 default_header: IndexHeader,
242 ) -> Result<Self, HgError> {
233 ) -> Result<Self, HgError> {
243 let header = IndexHeader::parse(bytes.as_ref())?;
234 let header =
235 IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header);
244
236
245 if header.format_version() != IndexHeader::REVLOGV1 {
237 if header.format_version() != IndexHeader::REVLOGV1 {
246 // A proper new version should have had a repo/store
238 // A proper new version should have had a repo/store
247 // requirement.
239 // requirement.
248 return Err(HgError::corrupted("unsupported revlog version"));
240 return Err(HgError::corrupted("unsupported revlog version"));
249 }
241 }
250
242
251 // This is only correct because we know version is REVLOGV1.
243 // This is only correct because we know version is REVLOGV1.
252 // In v2 we always use generaldelta, while in v0 we never use
244 // In v2 we always use generaldelta, while in v0 we never use
253 // generaldelta. Similar for [is_inline] (it's only used in v1).
245 // generaldelta. Similar for [is_inline] (it's only used in v1).
254 let uses_generaldelta = header.format_flags().uses_generaldelta();
246 let uses_generaldelta = header.format_flags().uses_generaldelta();
255
247
256 if header.format_flags().is_inline() {
248 if header.format_flags().is_inline() {
257 let mut offset: usize = 0;
249 let mut offset: usize = 0;
258 let mut offsets = Vec::new();
250 let mut offsets = Vec::new();
259
251
260 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
252 while offset + INDEX_ENTRY_SIZE <= bytes.len() {
261 offsets.push(offset);
253 offsets.push(offset);
262 let end = offset + INDEX_ENTRY_SIZE;
254 let end = offset + INDEX_ENTRY_SIZE;
263 let entry = IndexEntry {
255 let entry = IndexEntry {
264 bytes: &bytes[offset..end],
256 bytes: &bytes[offset..end],
265 offset_override: None,
257 offset_override: None,
266 };
258 };
267
259
268 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
260 offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize;
269 }
261 }
270
262
271 if offset == bytes.len() {
263 if offset == bytes.len() {
272 Ok(Self {
264 Ok(Self {
273 bytes: IndexData::new(bytes),
265 bytes: IndexData::new(bytes),
274 offsets: Some(offsets),
266 offsets: Some(offsets),
275 uses_generaldelta,
267 uses_generaldelta,
276 })
268 })
277 } else {
269 } else {
278 Err(HgError::corrupted("unexpected inline revlog length"))
270 Err(HgError::corrupted("unexpected inline revlog length"))
279 }
271 }
280 } else {
272 } else {
281 Ok(Self {
273 Ok(Self {
282 bytes: IndexData::new(bytes),
274 bytes: IndexData::new(bytes),
283 offsets: None,
275 offsets: None,
284 uses_generaldelta,
276 uses_generaldelta,
285 })
277 })
286 }
278 }
287 }
279 }
288
280
289 pub fn uses_generaldelta(&self) -> bool {
281 pub fn uses_generaldelta(&self) -> bool {
290 self.uses_generaldelta
282 self.uses_generaldelta
291 }
283 }
292
284
293 /// Value of the inline flag.
285 /// Value of the inline flag.
294 pub fn is_inline(&self) -> bool {
286 pub fn is_inline(&self) -> bool {
295 self.offsets.is_some()
287 self.offsets.is_some()
296 }
288 }
297
289
298 /// Return a slice of bytes if `revlog` is inline. Panic if not.
290 /// Return a slice of bytes if `revlog` is inline. Panic if not.
299 pub fn data(&self, start: usize, end: usize) -> &[u8] {
291 pub fn data(&self, start: usize, end: usize) -> &[u8] {
300 if !self.is_inline() {
292 if !self.is_inline() {
301 panic!("tried to access data in the index of a revlog that is not inline");
293 panic!("tried to access data in the index of a revlog that is not inline");
302 }
294 }
303 &self.bytes[start..end]
295 &self.bytes[start..end]
304 }
296 }
305
297
306 /// Return number of entries of the revlog index.
298 /// Return number of entries of the revlog index.
307 pub fn len(&self) -> usize {
299 pub fn len(&self) -> usize {
308 if let Some(offsets) = &self.offsets {
300 if let Some(offsets) = &self.offsets {
309 offsets.len()
301 offsets.len()
310 } else {
302 } else {
311 self.bytes.len() / INDEX_ENTRY_SIZE
303 self.bytes.len() / INDEX_ENTRY_SIZE
312 }
304 }
313 }
305 }
314
306
315 /// Returns `true` if the `Index` has zero `entries`.
307 /// Returns `true` if the `Index` has zero `entries`.
316 pub fn is_empty(&self) -> bool {
308 pub fn is_empty(&self) -> bool {
317 self.len() == 0
309 self.len() == 0
318 }
310 }
319
311
320 /// Return the index entry corresponding to the given revision if it
312 /// Return the index entry corresponding to the given revision if it
321 /// exists.
313 /// exists.
322 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
314 pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> {
323 if rev == NULL_REVISION {
315 if rev == NULL_REVISION {
324 return None;
316 return None;
325 }
317 }
326 Some(if let Some(offsets) = &self.offsets {
318 Some(if let Some(offsets) = &self.offsets {
327 self.get_entry_inline(rev, offsets)
319 self.get_entry_inline(rev, offsets)
328 } else {
320 } else {
329 self.get_entry_separated(rev)
321 self.get_entry_separated(rev)
330 })
322 })
331 }
323 }
332
324
333 fn get_entry_inline(
325 fn get_entry_inline(
334 &self,
326 &self,
335 rev: Revision,
327 rev: Revision,
336 offsets: &[usize],
328 offsets: &[usize],
337 ) -> IndexEntry {
329 ) -> IndexEntry {
338 let start = offsets[rev.0 as usize];
330 let start = offsets[rev.0 as usize];
339 let end = start + INDEX_ENTRY_SIZE;
331 let end = start + INDEX_ENTRY_SIZE;
340 let bytes = &self.bytes[start..end];
332 let bytes = &self.bytes[start..end];
341
333
342 // See IndexEntry for an explanation of this override.
334 // See IndexEntry for an explanation of this override.
343 let offset_override = Some(end);
335 let offset_override = Some(end);
344
336
345 IndexEntry {
337 IndexEntry {
346 bytes,
338 bytes,
347 offset_override,
339 offset_override,
348 }
340 }
349 }
341 }
350
342
351 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
343 fn get_entry_separated(&self, rev: Revision) -> IndexEntry {
352 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
344 let start = rev.0 as usize * INDEX_ENTRY_SIZE;
353 let end = start + INDEX_ENTRY_SIZE;
345 let end = start + INDEX_ENTRY_SIZE;
354 let bytes = &self.bytes[start..end];
346 let bytes = &self.bytes[start..end];
355
347
356 // Override the offset of the first revision as its bytes are used
348 // Override the offset of the first revision as its bytes are used
357 // for the index's metadata (saving space because it is always 0)
349 // for the index's metadata (saving space because it is always 0)
358 let offset_override = if rev == Revision(0) { Some(0) } else { None };
350 let offset_override = if rev == Revision(0) { Some(0) } else { None };
359
351
360 IndexEntry {
352 IndexEntry {
361 bytes,
353 bytes,
362 offset_override,
354 offset_override,
363 }
355 }
364 }
356 }
365
357
366 /// TODO move this to the trait probably, along with other things
358 /// TODO move this to the trait probably, along with other things
367 pub fn append(
359 pub fn append(
368 &mut self,
360 &mut self,
369 revision_data: RevisionDataParams,
361 revision_data: RevisionDataParams,
370 ) -> Result<(), RevlogError> {
362 ) -> Result<(), RevlogError> {
371 revision_data.validate()?;
363 revision_data.validate()?;
372 let new_offset = self.bytes.len();
364 let new_offset = self.bytes.len();
373 if let Some(offsets) = self.offsets.as_mut() {
365 if let Some(offsets) = self.offsets.as_mut() {
374 offsets.push(new_offset)
366 offsets.push(new_offset)
375 }
367 }
376 self.bytes.added.extend(revision_data.into_v1().as_bytes());
368 self.bytes.added.extend(revision_data.into_v1().as_bytes());
377 Ok(())
369 Ok(())
378 }
370 }
379 }
371 }
380
372
381 impl super::RevlogIndex for Index {
373 impl super::RevlogIndex for Index {
382 fn len(&self) -> usize {
374 fn len(&self) -> usize {
383 self.len()
375 self.len()
384 }
376 }
385
377
386 fn node(&self, rev: Revision) -> Option<&Node> {
378 fn node(&self, rev: Revision) -> Option<&Node> {
387 self.get_entry(rev).map(|entry| entry.hash())
379 self.get_entry(rev).map(|entry| entry.hash())
388 }
380 }
389 }
381 }
390
382
391 #[derive(Debug)]
383 #[derive(Debug)]
392 pub struct IndexEntry<'a> {
384 pub struct IndexEntry<'a> {
393 bytes: &'a [u8],
385 bytes: &'a [u8],
394 /// Allows to override the offset value of the entry.
386 /// Allows to override the offset value of the entry.
395 ///
387 ///
396 /// For interleaved index and data, the offset stored in the index
388 /// For interleaved index and data, the offset stored in the index
397 /// corresponds to the separated data offset.
389 /// corresponds to the separated data offset.
398 /// It has to be overridden with the actual offset in the interleaved
390 /// It has to be overridden with the actual offset in the interleaved
399 /// index which is just after the index block.
391 /// index which is just after the index block.
400 ///
392 ///
401 /// For separated index and data, the offset stored in the first index
393 /// For separated index and data, the offset stored in the first index
402 /// entry is mixed with the index headers.
394 /// entry is mixed with the index headers.
403 /// It has to be overridden with 0.
395 /// It has to be overridden with 0.
404 offset_override: Option<usize>,
396 offset_override: Option<usize>,
405 }
397 }
406
398
407 impl<'a> IndexEntry<'a> {
399 impl<'a> IndexEntry<'a> {
408 /// Return the offset of the data.
400 /// Return the offset of the data.
409 pub fn offset(&self) -> usize {
401 pub fn offset(&self) -> usize {
410 if let Some(offset_override) = self.offset_override {
402 if let Some(offset_override) = self.offset_override {
411 offset_override
403 offset_override
412 } else {
404 } else {
413 let mut bytes = [0; 8];
405 let mut bytes = [0; 8];
414 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
406 bytes[2..8].copy_from_slice(&self.bytes[0..=5]);
415 BigEndian::read_u64(&bytes[..]) as usize
407 BigEndian::read_u64(&bytes[..]) as usize
416 }
408 }
417 }
409 }
418
410
419 pub fn flags(&self) -> u16 {
411 pub fn flags(&self) -> u16 {
420 BigEndian::read_u16(&self.bytes[6..=7])
412 BigEndian::read_u16(&self.bytes[6..=7])
421 }
413 }
422
414
423 /// Return the compressed length of the data.
415 /// Return the compressed length of the data.
424 pub fn compressed_len(&self) -> u32 {
416 pub fn compressed_len(&self) -> u32 {
425 BigEndian::read_u32(&self.bytes[8..=11])
417 BigEndian::read_u32(&self.bytes[8..=11])
426 }
418 }
427
419
428 /// Return the uncompressed length of the data.
420 /// Return the uncompressed length of the data.
429 pub fn uncompressed_len(&self) -> i32 {
421 pub fn uncompressed_len(&self) -> i32 {
430 BigEndian::read_i32(&self.bytes[12..=15])
422 BigEndian::read_i32(&self.bytes[12..=15])
431 }
423 }
432
424
433 /// Return the revision upon which the data has been derived.
425 /// Return the revision upon which the data has been derived.
434 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
426 pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision {
435 // TODO Maybe return an Option when base_revision == rev?
427 // TODO Maybe return an Option when base_revision == rev?
436 // Requires to add rev to IndexEntry
428 // Requires to add rev to IndexEntry
437
429
438 BigEndian::read_i32(&self.bytes[16..]).into()
430 BigEndian::read_i32(&self.bytes[16..]).into()
439 }
431 }
440
432
441 pub fn link_revision(&self) -> UncheckedRevision {
433 pub fn link_revision(&self) -> UncheckedRevision {
442 BigEndian::read_i32(&self.bytes[20..]).into()
434 BigEndian::read_i32(&self.bytes[20..]).into()
443 }
435 }
444
436
445 pub fn p1(&self) -> UncheckedRevision {
437 pub fn p1(&self) -> UncheckedRevision {
446 BigEndian::read_i32(&self.bytes[24..]).into()
438 BigEndian::read_i32(&self.bytes[24..]).into()
447 }
439 }
448
440
449 pub fn p2(&self) -> UncheckedRevision {
441 pub fn p2(&self) -> UncheckedRevision {
450 BigEndian::read_i32(&self.bytes[28..]).into()
442 BigEndian::read_i32(&self.bytes[28..]).into()
451 }
443 }
452
444
453 /// Return the hash of revision's full text.
445 /// Return the hash of revision's full text.
454 ///
446 ///
455 /// Currently, SHA-1 is used and only the first 20 bytes of this field
447 /// Currently, SHA-1 is used and only the first 20 bytes of this field
456 /// are used.
448 /// are used.
457 pub fn hash(&self) -> &'a Node {
449 pub fn hash(&self) -> &'a Node {
458 (&self.bytes[32..52]).try_into().unwrap()
450 (&self.bytes[32..52]).try_into().unwrap()
459 }
451 }
460 }
452 }
461
453
462 #[cfg(test)]
454 #[cfg(test)]
463 mod tests {
455 mod tests {
464 use super::*;
456 use super::*;
465 use crate::node::NULL_NODE;
457 use crate::node::NULL_NODE;
466
458
467 #[cfg(test)]
459 #[cfg(test)]
468 #[derive(Debug, Copy, Clone)]
460 #[derive(Debug, Copy, Clone)]
469 pub struct IndexEntryBuilder {
461 pub struct IndexEntryBuilder {
470 is_first: bool,
462 is_first: bool,
471 is_inline: bool,
463 is_inline: bool,
472 is_general_delta: bool,
464 is_general_delta: bool,
473 version: u16,
465 version: u16,
474 offset: usize,
466 offset: usize,
475 compressed_len: usize,
467 compressed_len: usize,
476 uncompressed_len: usize,
468 uncompressed_len: usize,
477 base_revision_or_base_of_delta_chain: Revision,
469 base_revision_or_base_of_delta_chain: Revision,
478 link_revision: Revision,
470 link_revision: Revision,
479 p1: Revision,
471 p1: Revision,
480 p2: Revision,
472 p2: Revision,
481 node: Node,
473 node: Node,
482 }
474 }
483
475
484 #[cfg(test)]
476 #[cfg(test)]
485 impl IndexEntryBuilder {
477 impl IndexEntryBuilder {
486 #[allow(clippy::new_without_default)]
478 #[allow(clippy::new_without_default)]
487 pub fn new() -> Self {
479 pub fn new() -> Self {
488 Self {
480 Self {
489 is_first: false,
481 is_first: false,
490 is_inline: false,
482 is_inline: false,
491 is_general_delta: true,
483 is_general_delta: true,
492 version: 1,
484 version: 1,
493 offset: 0,
485 offset: 0,
494 compressed_len: 0,
486 compressed_len: 0,
495 uncompressed_len: 0,
487 uncompressed_len: 0,
496 base_revision_or_base_of_delta_chain: Revision(0),
488 base_revision_or_base_of_delta_chain: Revision(0),
497 link_revision: Revision(0),
489 link_revision: Revision(0),
498 p1: NULL_REVISION,
490 p1: NULL_REVISION,
499 p2: NULL_REVISION,
491 p2: NULL_REVISION,
500 node: NULL_NODE,
492 node: NULL_NODE,
501 }
493 }
502 }
494 }
503
495
504 pub fn is_first(&mut self, value: bool) -> &mut Self {
496 pub fn is_first(&mut self, value: bool) -> &mut Self {
505 self.is_first = value;
497 self.is_first = value;
506 self
498 self
507 }
499 }
508
500
509 pub fn with_inline(&mut self, value: bool) -> &mut Self {
501 pub fn with_inline(&mut self, value: bool) -> &mut Self {
510 self.is_inline = value;
502 self.is_inline = value;
511 self
503 self
512 }
504 }
513
505
514 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
506 pub fn with_general_delta(&mut self, value: bool) -> &mut Self {
515 self.is_general_delta = value;
507 self.is_general_delta = value;
516 self
508 self
517 }
509 }
518
510
519 pub fn with_version(&mut self, value: u16) -> &mut Self {
511 pub fn with_version(&mut self, value: u16) -> &mut Self {
520 self.version = value;
512 self.version = value;
521 self
513 self
522 }
514 }
523
515
524 pub fn with_offset(&mut self, value: usize) -> &mut Self {
516 pub fn with_offset(&mut self, value: usize) -> &mut Self {
525 self.offset = value;
517 self.offset = value;
526 self
518 self
527 }
519 }
528
520
529 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
521 pub fn with_compressed_len(&mut self, value: usize) -> &mut Self {
530 self.compressed_len = value;
522 self.compressed_len = value;
531 self
523 self
532 }
524 }
533
525
534 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
526 pub fn with_uncompressed_len(&mut self, value: usize) -> &mut Self {
535 self.uncompressed_len = value;
527 self.uncompressed_len = value;
536 self
528 self
537 }
529 }
538
530
539 pub fn with_base_revision_or_base_of_delta_chain(
531 pub fn with_base_revision_or_base_of_delta_chain(
540 &mut self,
532 &mut self,
541 value: Revision,
533 value: Revision,
542 ) -> &mut Self {
534 ) -> &mut Self {
543 self.base_revision_or_base_of_delta_chain = value;
535 self.base_revision_or_base_of_delta_chain = value;
544 self
536 self
545 }
537 }
546
538
547 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
539 pub fn with_link_revision(&mut self, value: Revision) -> &mut Self {
548 self.link_revision = value;
540 self.link_revision = value;
549 self
541 self
550 }
542 }
551
543
552 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
544 pub fn with_p1(&mut self, value: Revision) -> &mut Self {
553 self.p1 = value;
545 self.p1 = value;
554 self
546 self
555 }
547 }
556
548
557 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
549 pub fn with_p2(&mut self, value: Revision) -> &mut Self {
558 self.p2 = value;
550 self.p2 = value;
559 self
551 self
560 }
552 }
561
553
562 pub fn with_node(&mut self, value: Node) -> &mut Self {
554 pub fn with_node(&mut self, value: Node) -> &mut Self {
563 self.node = value;
555 self.node = value;
564 self
556 self
565 }
557 }
566
558
567 pub fn build(&self) -> Vec<u8> {
559 pub fn build(&self) -> Vec<u8> {
568 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
560 let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE);
569 if self.is_first {
561 if self.is_first {
570 bytes.extend(&match (self.is_general_delta, self.is_inline) {
562 bytes.extend(&match (self.is_general_delta, self.is_inline) {
571 (false, false) => [0u8, 0],
563 (false, false) => [0u8, 0],
572 (false, true) => [0u8, 1],
564 (false, true) => [0u8, 1],
573 (true, false) => [0u8, 2],
565 (true, false) => [0u8, 2],
574 (true, true) => [0u8, 3],
566 (true, true) => [0u8, 3],
575 });
567 });
576 bytes.extend(&self.version.to_be_bytes());
568 bytes.extend(&self.version.to_be_bytes());
577 // Remaining offset bytes.
569 // Remaining offset bytes.
578 bytes.extend(&[0u8; 2]);
570 bytes.extend(&[0u8; 2]);
579 } else {
571 } else {
580 // Offset stored on 48 bits (6 bytes)
572 // Offset stored on 48 bits (6 bytes)
581 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
573 bytes.extend(&(self.offset as u64).to_be_bytes()[2..]);
582 }
574 }
583 bytes.extend(&[0u8; 2]); // Revision flags.
575 bytes.extend(&[0u8; 2]); // Revision flags.
584 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
576 bytes.extend(&(self.compressed_len as u32).to_be_bytes());
585 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
577 bytes.extend(&(self.uncompressed_len as u32).to_be_bytes());
586 bytes.extend(
578 bytes.extend(
587 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
579 &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(),
588 );
580 );
589 bytes.extend(&self.link_revision.0.to_be_bytes());
581 bytes.extend(&self.link_revision.0.to_be_bytes());
590 bytes.extend(&self.p1.0.to_be_bytes());
582 bytes.extend(&self.p1.0.to_be_bytes());
591 bytes.extend(&self.p2.0.to_be_bytes());
583 bytes.extend(&self.p2.0.to_be_bytes());
592 bytes.extend(self.node.as_bytes());
584 bytes.extend(self.node.as_bytes());
593 bytes.extend(vec![0u8; 12]);
585 bytes.extend(vec![0u8; 12]);
594 bytes
586 bytes
595 }
587 }
596 }
588 }
597
589
598 pub fn is_inline(index_bytes: &[u8]) -> bool {
590 pub fn is_inline(index_bytes: &[u8]) -> bool {
599 IndexHeader::parse(index_bytes)
591 IndexHeader::parse(index_bytes)
600 .expect("too short")
592 .expect("too short")
593 .unwrap()
601 .format_flags()
594 .format_flags()
602 .is_inline()
595 .is_inline()
603 }
596 }
604
597
605 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
598 pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
606 IndexHeader::parse(index_bytes)
599 IndexHeader::parse(index_bytes)
607 .expect("too short")
600 .expect("too short")
601 .unwrap()
608 .format_flags()
602 .format_flags()
609 .uses_generaldelta()
603 .uses_generaldelta()
610 }
604 }
611
605
612 pub fn get_version(index_bytes: &[u8]) -> u16 {
606 pub fn get_version(index_bytes: &[u8]) -> u16 {
613 IndexHeader::parse(index_bytes)
607 IndexHeader::parse(index_bytes)
614 .expect("too short")
608 .expect("too short")
609 .unwrap()
615 .format_version()
610 .format_version()
616 }
611 }
617
612
618 #[test]
613 #[test]
619 fn flags_when_no_inline_flag_test() {
614 fn flags_when_no_inline_flag_test() {
620 let bytes = IndexEntryBuilder::new()
615 let bytes = IndexEntryBuilder::new()
621 .is_first(true)
616 .is_first(true)
622 .with_general_delta(false)
617 .with_general_delta(false)
623 .with_inline(false)
618 .with_inline(false)
624 .build();
619 .build();
625
620
626 assert!(!is_inline(&bytes));
621 assert!(!is_inline(&bytes));
627 assert!(!uses_generaldelta(&bytes));
622 assert!(!uses_generaldelta(&bytes));
628 }
623 }
629
624
630 #[test]
625 #[test]
631 fn flags_when_inline_flag_test() {
626 fn flags_when_inline_flag_test() {
632 let bytes = IndexEntryBuilder::new()
627 let bytes = IndexEntryBuilder::new()
633 .is_first(true)
628 .is_first(true)
634 .with_general_delta(false)
629 .with_general_delta(false)
635 .with_inline(true)
630 .with_inline(true)
636 .build();
631 .build();
637
632
638 assert!(is_inline(&bytes));
633 assert!(is_inline(&bytes));
639 assert!(!uses_generaldelta(&bytes));
634 assert!(!uses_generaldelta(&bytes));
640 }
635 }
641
636
642 #[test]
637 #[test]
643 fn flags_when_inline_and_generaldelta_flags_test() {
638 fn flags_when_inline_and_generaldelta_flags_test() {
644 let bytes = IndexEntryBuilder::new()
639 let bytes = IndexEntryBuilder::new()
645 .is_first(true)
640 .is_first(true)
646 .with_general_delta(true)
641 .with_general_delta(true)
647 .with_inline(true)
642 .with_inline(true)
648 .build();
643 .build();
649
644
650 assert!(is_inline(&bytes));
645 assert!(is_inline(&bytes));
651 assert!(uses_generaldelta(&bytes));
646 assert!(uses_generaldelta(&bytes));
652 }
647 }
653
648
654 #[test]
649 #[test]
655 fn test_offset() {
650 fn test_offset() {
656 let bytes = IndexEntryBuilder::new().with_offset(1).build();
651 let bytes = IndexEntryBuilder::new().with_offset(1).build();
657 let entry = IndexEntry {
652 let entry = IndexEntry {
658 bytes: &bytes,
653 bytes: &bytes,
659 offset_override: None,
654 offset_override: None,
660 };
655 };
661
656
662 assert_eq!(entry.offset(), 1)
657 assert_eq!(entry.offset(), 1)
663 }
658 }
664
659
665 #[test]
660 #[test]
666 fn test_with_overridden_offset() {
661 fn test_with_overridden_offset() {
667 let bytes = IndexEntryBuilder::new().with_offset(1).build();
662 let bytes = IndexEntryBuilder::new().with_offset(1).build();
668 let entry = IndexEntry {
663 let entry = IndexEntry {
669 bytes: &bytes,
664 bytes: &bytes,
670 offset_override: Some(2),
665 offset_override: Some(2),
671 };
666 };
672
667
673 assert_eq!(entry.offset(), 2)
668 assert_eq!(entry.offset(), 2)
674 }
669 }
675
670
676 #[test]
671 #[test]
677 fn test_compressed_len() {
672 fn test_compressed_len() {
678 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
673 let bytes = IndexEntryBuilder::new().with_compressed_len(1).build();
679 let entry = IndexEntry {
674 let entry = IndexEntry {
680 bytes: &bytes,
675 bytes: &bytes,
681 offset_override: None,
676 offset_override: None,
682 };
677 };
683
678
684 assert_eq!(entry.compressed_len(), 1)
679 assert_eq!(entry.compressed_len(), 1)
685 }
680 }
686
681
687 #[test]
682 #[test]
688 fn test_uncompressed_len() {
683 fn test_uncompressed_len() {
689 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
684 let bytes = IndexEntryBuilder::new().with_uncompressed_len(1).build();
690 let entry = IndexEntry {
685 let entry = IndexEntry {
691 bytes: &bytes,
686 bytes: &bytes,
692 offset_override: None,
687 offset_override: None,
693 };
688 };
694
689
695 assert_eq!(entry.uncompressed_len(), 1)
690 assert_eq!(entry.uncompressed_len(), 1)
696 }
691 }
697
692
698 #[test]
693 #[test]
699 fn test_base_revision_or_base_of_delta_chain() {
694 fn test_base_revision_or_base_of_delta_chain() {
700 let bytes = IndexEntryBuilder::new()
695 let bytes = IndexEntryBuilder::new()
701 .with_base_revision_or_base_of_delta_chain(Revision(1))
696 .with_base_revision_or_base_of_delta_chain(Revision(1))
702 .build();
697 .build();
703 let entry = IndexEntry {
698 let entry = IndexEntry {
704 bytes: &bytes,
699 bytes: &bytes,
705 offset_override: None,
700 offset_override: None,
706 };
701 };
707
702
708 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
703 assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into())
709 }
704 }
710
705
711 #[test]
706 #[test]
712 fn link_revision_test() {
707 fn link_revision_test() {
713 let bytes = IndexEntryBuilder::new()
708 let bytes = IndexEntryBuilder::new()
714 .with_link_revision(Revision(123))
709 .with_link_revision(Revision(123))
715 .build();
710 .build();
716
711
717 let entry = IndexEntry {
712 let entry = IndexEntry {
718 bytes: &bytes,
713 bytes: &bytes,
719 offset_override: None,
714 offset_override: None,
720 };
715 };
721
716
722 assert_eq!(entry.link_revision(), 123.into());
717 assert_eq!(entry.link_revision(), 123.into());
723 }
718 }
724
719
725 #[test]
720 #[test]
726 fn p1_test() {
721 fn p1_test() {
727 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
722 let bytes = IndexEntryBuilder::new().with_p1(Revision(123)).build();
728
723
729 let entry = IndexEntry {
724 let entry = IndexEntry {
730 bytes: &bytes,
725 bytes: &bytes,
731 offset_override: None,
726 offset_override: None,
732 };
727 };
733
728
734 assert_eq!(entry.p1(), 123.into());
729 assert_eq!(entry.p1(), 123.into());
735 }
730 }
736
731
737 #[test]
732 #[test]
738 fn p2_test() {
733 fn p2_test() {
739 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
734 let bytes = IndexEntryBuilder::new().with_p2(Revision(123)).build();
740
735
741 let entry = IndexEntry {
736 let entry = IndexEntry {
742 bytes: &bytes,
737 bytes: &bytes,
743 offset_override: None,
738 offset_override: None,
744 };
739 };
745
740
746 assert_eq!(entry.p2(), 123.into());
741 assert_eq!(entry.p2(), 123.into());
747 }
742 }
748
743
749 #[test]
744 #[test]
750 fn node_test() {
745 fn node_test() {
751 let node = Node::from_hex("0123456789012345678901234567890123456789")
746 let node = Node::from_hex("0123456789012345678901234567890123456789")
752 .unwrap();
747 .unwrap();
753 let bytes = IndexEntryBuilder::new().with_node(node).build();
748 let bytes = IndexEntryBuilder::new().with_node(node).build();
754
749
755 let entry = IndexEntry {
750 let entry = IndexEntry {
756 bytes: &bytes,
751 bytes: &bytes,
757 offset_override: None,
752 offset_override: None,
758 };
753 };
759
754
760 assert_eq!(*entry.hash(), node);
755 assert_eq!(*entry.hash(), node);
761 }
756 }
762
757
763 #[test]
758 #[test]
764 fn version_test() {
759 fn version_test() {
765 let bytes = IndexEntryBuilder::new()
760 let bytes = IndexEntryBuilder::new()
766 .is_first(true)
761 .is_first(true)
767 .with_version(2)
762 .with_version(2)
768 .build();
763 .build();
769
764
770 assert_eq!(get_version(&bytes), 2)
765 assert_eq!(get_version(&bytes), 2)
771 }
766 }
772 }
767 }
773
768
774 #[cfg(test)]
769 #[cfg(test)]
775 pub use tests::IndexEntryBuilder;
770 pub use tests::IndexEntryBuilder;
@@ -1,209 +1,213 b''
1 use crate::errors::HgError;
1 use crate::errors::HgError;
2 use crate::revlog::{Node, NodePrefix};
2 use crate::revlog::{Node, NodePrefix};
3 use crate::revlog::{Revlog, RevlogError};
3 use crate::revlog::{Revlog, RevlogError};
4 use crate::utils::hg_path::HgPath;
4 use crate::utils::hg_path::HgPath;
5 use crate::utils::SliceExt;
5 use crate::utils::SliceExt;
6 use crate::vfs::Vfs;
6 use crate::vfs::Vfs;
7 use crate::{Graph, GraphError, Revision, UncheckedRevision};
7 use crate::{
8 Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision,
9 };
8
10
9 /// A specialized `Revlog` to work with `manifest` data format.
11 /// A specialized `Revlog` to work with `manifest` data format.
10 pub struct Manifestlog {
12 pub struct Manifestlog {
11 /// The generic `revlog` format.
13 /// The generic `revlog` format.
12 revlog: Revlog,
14 pub(crate) revlog: Revlog,
13 }
15 }
14
16
15 impl Graph for Manifestlog {
17 impl Graph for Manifestlog {
16 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
18 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
17 self.revlog.parents(rev)
19 self.revlog.parents(rev)
18 }
20 }
19 }
21 }
20
22
21 impl Manifestlog {
23 impl Manifestlog {
22 /// Open the `manifest` of a repository given by its root.
24 /// Open the `manifest` of a repository given by its root.
23 pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> {
25 pub fn open(
24 let revlog =
26 store_vfs: &Vfs,
25 Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?;
27 options: RevlogOpenOptions,
28 ) -> Result<Self, HgError> {
29 let revlog = Revlog::open(store_vfs, "00manifest.i", None, options)?;
26 Ok(Self { revlog })
30 Ok(Self { revlog })
27 }
31 }
28
32
29 /// Return the `Manifest` for the given node ID.
33 /// Return the `Manifest` for the given node ID.
30 ///
34 ///
31 /// Note: this is a node ID in the manifestlog, typically found through
35 /// Note: this is a node ID in the manifestlog, typically found through
32 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
36 /// `ChangelogEntry::manifest_node`. It is *not* the node ID of any
33 /// changeset.
37 /// changeset.
34 ///
38 ///
35 /// See also `Repo::manifest_for_node`
39 /// See also `Repo::manifest_for_node`
36 pub fn data_for_node(
40 pub fn data_for_node(
37 &self,
41 &self,
38 node: NodePrefix,
42 node: NodePrefix,
39 ) -> Result<Manifest, RevlogError> {
43 ) -> Result<Manifest, RevlogError> {
40 let rev = self.revlog.rev_from_node(node)?;
44 let rev = self.revlog.rev_from_node(node)?;
41 self.data_for_checked_rev(rev)
45 self.data_for_checked_rev(rev)
42 }
46 }
43
47
44 /// Return the `Manifest` of a given revision number.
48 /// Return the `Manifest` of a given revision number.
45 ///
49 ///
46 /// Note: this is a revision number in the manifestlog, *not* of any
50 /// Note: this is a revision number in the manifestlog, *not* of any
47 /// changeset.
51 /// changeset.
48 ///
52 ///
49 /// See also `Repo::manifest_for_rev`
53 /// See also `Repo::manifest_for_rev`
50 pub fn data_for_rev(
54 pub fn data_for_rev(
51 &self,
55 &self,
52 rev: UncheckedRevision,
56 rev: UncheckedRevision,
53 ) -> Result<Manifest, RevlogError> {
57 ) -> Result<Manifest, RevlogError> {
54 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
58 let bytes = self.revlog.get_rev_data(rev)?.into_owned();
55 Ok(Manifest { bytes })
59 Ok(Manifest { bytes })
56 }
60 }
57
61
58 pub fn data_for_checked_rev(
62 pub fn data_for_checked_rev(
59 &self,
63 &self,
60 rev: Revision,
64 rev: Revision,
61 ) -> Result<Manifest, RevlogError> {
65 ) -> Result<Manifest, RevlogError> {
62 let bytes =
66 let bytes =
63 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
67 self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned();
64 Ok(Manifest { bytes })
68 Ok(Manifest { bytes })
65 }
69 }
66 }
70 }
67
71
68 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
72 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
69 #[derive(Debug)]
73 #[derive(Debug)]
70 pub struct Manifest {
74 pub struct Manifest {
71 /// Format for a manifest: flat sequence of variable-size entries,
75 /// Format for a manifest: flat sequence of variable-size entries,
72 /// sorted by path, each as:
76 /// sorted by path, each as:
73 ///
77 ///
74 /// ```text
78 /// ```text
75 /// <path> \0 <hex_node_id> <flags> \n
79 /// <path> \0 <hex_node_id> <flags> \n
76 /// ```
80 /// ```
77 ///
81 ///
78 /// The last entry is also terminated by a newline character.
82 /// The last entry is also terminated by a newline character.
79 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
83 /// Flags is one of `b""` (the empty string), `b"x"`, `b"l"`, or `b"t"`.
80 bytes: Vec<u8>,
84 bytes: Vec<u8>,
81 }
85 }
82
86
83 impl Manifest {
87 impl Manifest {
84 pub fn iter(
88 pub fn iter(
85 &self,
89 &self,
86 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
90 ) -> impl Iterator<Item = Result<ManifestEntry, HgError>> {
87 self.bytes
91 self.bytes
88 .split(|b| b == &b'\n')
92 .split(|b| b == &b'\n')
89 .filter(|line| !line.is_empty())
93 .filter(|line| !line.is_empty())
90 .map(ManifestEntry::from_raw)
94 .map(ManifestEntry::from_raw)
91 }
95 }
92
96
93 /// If the given path is in this manifest, return its filelog node ID
97 /// If the given path is in this manifest, return its filelog node ID
94 pub fn find_by_path(
98 pub fn find_by_path(
95 &self,
99 &self,
96 path: &HgPath,
100 path: &HgPath,
97 ) -> Result<Option<ManifestEntry>, HgError> {
101 ) -> Result<Option<ManifestEntry>, HgError> {
98 use std::cmp::Ordering::*;
102 use std::cmp::Ordering::*;
99 let path = path.as_bytes();
103 let path = path.as_bytes();
100 // Both boundaries of this `&[u8]` slice are always at the boundary of
104 // Both boundaries of this `&[u8]` slice are always at the boundary of
101 // an entry
105 // an entry
102 let mut bytes = &*self.bytes;
106 let mut bytes = &*self.bytes;
103
107
104 // Binary search algorithm derived from `[T]::binary_search_by`
108 // Binary search algorithm derived from `[T]::binary_search_by`
105 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
109 // <https://github.com/rust-lang/rust/blob/1.57.0/library/core/src/slice/mod.rs#L2221>
106 // except we don’t have a slice of entries. Instead we jump to the
110 // except we don’t have a slice of entries. Instead we jump to the
107 // middle of the byte slice and look around for entry delimiters
111 // middle of the byte slice and look around for entry delimiters
108 // (newlines).
112 // (newlines).
109 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
113 while let Some(entry_range) = Self::find_entry_near_middle_of(bytes)? {
110 let (entry_path, rest) =
114 let (entry_path, rest) =
111 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
115 ManifestEntry::split_path(&bytes[entry_range.clone()])?;
112 let cmp = entry_path.cmp(path);
116 let cmp = entry_path.cmp(path);
113 if cmp == Less {
117 if cmp == Less {
114 let after_newline = entry_range.end + 1;
118 let after_newline = entry_range.end + 1;
115 bytes = &bytes[after_newline..];
119 bytes = &bytes[after_newline..];
116 } else if cmp == Greater {
120 } else if cmp == Greater {
117 bytes = &bytes[..entry_range.start];
121 bytes = &bytes[..entry_range.start];
118 } else {
122 } else {
119 return Ok(Some(ManifestEntry::from_path_and_rest(
123 return Ok(Some(ManifestEntry::from_path_and_rest(
120 entry_path, rest,
124 entry_path, rest,
121 )));
125 )));
122 }
126 }
123 }
127 }
124 Ok(None)
128 Ok(None)
125 }
129 }
126
130
127 /// If there is at least one, return the byte range of an entry *excluding*
131 /// If there is at least one, return the byte range of an entry *excluding*
128 /// the final newline.
132 /// the final newline.
129 fn find_entry_near_middle_of(
133 fn find_entry_near_middle_of(
130 bytes: &[u8],
134 bytes: &[u8],
131 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
135 ) -> Result<Option<std::ops::Range<usize>>, HgError> {
132 let len = bytes.len();
136 let len = bytes.len();
133 if len > 0 {
137 if len > 0 {
134 let middle = bytes.len() / 2;
138 let middle = bytes.len() / 2;
135 // Integer division rounds down, so `middle < len`.
139 // Integer division rounds down, so `middle < len`.
136 let (before, after) = bytes.split_at(middle);
140 let (before, after) = bytes.split_at(middle);
137 let is_newline = |&byte: &u8| byte == b'\n';
141 let is_newline = |&byte: &u8| byte == b'\n';
138 let entry_start = match before.iter().rposition(is_newline) {
142 let entry_start = match before.iter().rposition(is_newline) {
139 Some(i) => i + 1,
143 Some(i) => i + 1,
140 None => 0, // We choose the first entry in `bytes`
144 None => 0, // We choose the first entry in `bytes`
141 };
145 };
142 let entry_end = match after.iter().position(is_newline) {
146 let entry_end = match after.iter().position(is_newline) {
143 Some(i) => {
147 Some(i) => {
144 // No `+ 1` here to exclude this newline from the range
148 // No `+ 1` here to exclude this newline from the range
145 middle + i
149 middle + i
146 }
150 }
147 None => {
151 None => {
148 // In a well-formed manifest:
152 // In a well-formed manifest:
149 //
153 //
150 // * Since `len > 0`, `bytes` contains at least one entry
154 // * Since `len > 0`, `bytes` contains at least one entry
151 // * Every entry ends with a newline
155 // * Every entry ends with a newline
152 // * Since `middle < len`, `after` contains at least the
156 // * Since `middle < len`, `after` contains at least the
153 // newline at the end of the last entry of `bytes`.
157 // newline at the end of the last entry of `bytes`.
154 //
158 //
155 // We didn’t find a newline, so this manifest is not
159 // We didn’t find a newline, so this manifest is not
156 // well-formed.
160 // well-formed.
157 return Err(HgError::corrupted(
161 return Err(HgError::corrupted(
158 "manifest entry without \\n delimiter",
162 "manifest entry without \\n delimiter",
159 ));
163 ));
160 }
164 }
161 };
165 };
162 Ok(Some(entry_start..entry_end))
166 Ok(Some(entry_start..entry_end))
163 } else {
167 } else {
164 // len == 0
168 // len == 0
165 Ok(None)
169 Ok(None)
166 }
170 }
167 }
171 }
168 }
172 }
169
173
170 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
174 /// `Manifestlog` entry which knows how to interpret the `manifest` data bytes.
171 #[derive(Debug)]
175 #[derive(Debug)]
172 pub struct ManifestEntry<'manifest> {
176 pub struct ManifestEntry<'manifest> {
173 pub path: &'manifest HgPath,
177 pub path: &'manifest HgPath,
174 pub hex_node_id: &'manifest [u8],
178 pub hex_node_id: &'manifest [u8],
175
179
176 /// `Some` values are b'x', b'l', or 't'
180 /// `Some` values are b'x', b'l', or 't'
177 pub flags: Option<u8>,
181 pub flags: Option<u8>,
178 }
182 }
179
183
180 impl<'a> ManifestEntry<'a> {
184 impl<'a> ManifestEntry<'a> {
181 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
185 fn split_path(bytes: &[u8]) -> Result<(&[u8], &[u8]), HgError> {
182 bytes.split_2(b'\0').ok_or_else(|| {
186 bytes.split_2(b'\0').ok_or_else(|| {
183 HgError::corrupted("manifest entry without \\0 delimiter")
187 HgError::corrupted("manifest entry without \\0 delimiter")
184 })
188 })
185 }
189 }
186
190
187 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
191 fn from_path_and_rest(path: &'a [u8], rest: &'a [u8]) -> Self {
188 let (hex_node_id, flags) = match rest.split_last() {
192 let (hex_node_id, flags) = match rest.split_last() {
189 Some((&b'x', rest)) => (rest, Some(b'x')),
193 Some((&b'x', rest)) => (rest, Some(b'x')),
190 Some((&b'l', rest)) => (rest, Some(b'l')),
194 Some((&b'l', rest)) => (rest, Some(b'l')),
191 Some((&b't', rest)) => (rest, Some(b't')),
195 Some((&b't', rest)) => (rest, Some(b't')),
192 _ => (rest, None),
196 _ => (rest, None),
193 };
197 };
194 Self {
198 Self {
195 path: HgPath::new(path),
199 path: HgPath::new(path),
196 hex_node_id,
200 hex_node_id,
197 flags,
201 flags,
198 }
202 }
199 }
203 }
200
204
201 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
205 fn from_raw(bytes: &'a [u8]) -> Result<Self, HgError> {
202 let (path, rest) = Self::split_path(bytes)?;
206 let (path, rest) = Self::split_path(bytes)?;
203 Ok(Self::from_path_and_rest(path, rest))
207 Ok(Self::from_path_and_rest(path, rest))
204 }
208 }
205
209
206 pub fn node_id(&self) -> Result<Node, HgError> {
210 pub fn node_id(&self) -> Result<Node, HgError> {
207 Node::from_hex_for_repo(self.hex_node_id)
211 Node::from_hex_for_repo(self.hex_node_id)
208 }
212 }
209 }
213 }
@@ -1,965 +1,1030 b''
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
1 // Copyright 2018-2023 Georges Racinet <georges.racinet@octobus.net>
2 // and Mercurial contributors
2 // and Mercurial contributors
3 //
3 //
4 // This software may be used and distributed according to the terms of the
4 // This software may be used and distributed according to the terms of the
5 // GNU General Public License version 2 or any later version.
5 // GNU General Public License version 2 or any later version.
6 //! Mercurial concepts for handling revision history
6 //! Mercurial concepts for handling revision history
7
7
8 pub mod node;
8 pub mod node;
9 pub mod nodemap;
9 pub mod nodemap;
10 mod nodemap_docket;
10 mod nodemap_docket;
11 pub mod path_encode;
11 pub mod path_encode;
12 pub use node::{FromHexError, Node, NodePrefix};
12 pub use node::{FromHexError, Node, NodePrefix};
13 pub mod changelog;
13 pub mod changelog;
14 pub mod filelog;
14 pub mod filelog;
15 pub mod index;
15 pub mod index;
16 pub mod manifest;
16 pub mod manifest;
17 pub mod patch;
17 pub mod patch;
18
18
19 use std::borrow::Cow;
19 use std::borrow::Cow;
20 use std::io::Read;
20 use std::io::Read;
21 use std::ops::Deref;
21 use std::ops::Deref;
22 use std::path::Path;
22 use std::path::Path;
23
23
24 use flate2::read::ZlibDecoder;
24 use flate2::read::ZlibDecoder;
25 use sha1::{Digest, Sha1};
25 use sha1::{Digest, Sha1};
26 use std::cell::RefCell;
26 use std::cell::RefCell;
27 use zstd;
27 use zstd;
28
28
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
29 use self::node::{NODE_BYTES_LENGTH, NULL_NODE};
30 use self::nodemap_docket::NodeMapDocket;
30 use self::nodemap_docket::NodeMapDocket;
31 use super::index::Index;
31 use super::index::Index;
32 use super::nodemap::{NodeMap, NodeMapError};
32 use super::nodemap::{NodeMap, NodeMapError};
33 use crate::errors::HgError;
33 use crate::errors::HgError;
34 use crate::vfs::Vfs;
34 use crate::vfs::Vfs;
35
35
36 /// As noted in revlog.c, revision numbers are actually encoded in
36 /// As noted in revlog.c, revision numbers are actually encoded in
37 /// 4 bytes, and are liberally converted to ints, whence the i32
37 /// 4 bytes, and are liberally converted to ints, whence the i32
38 pub type BaseRevision = i32;
38 pub type BaseRevision = i32;
39
39
40 /// Mercurial revision numbers
40 /// Mercurial revision numbers
41 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
41 /// In contrast to the more general [`UncheckedRevision`], these are "checked"
42 /// in the sense that they should only be used for revisions that are
42 /// in the sense that they should only be used for revisions that are
43 /// valid for a given index (i.e. in bounds).
43 /// valid for a given index (i.e. in bounds).
44 #[derive(
44 #[derive(
45 Debug,
45 Debug,
46 derive_more::Display,
46 derive_more::Display,
47 Clone,
47 Clone,
48 Copy,
48 Copy,
49 Hash,
49 Hash,
50 PartialEq,
50 PartialEq,
51 Eq,
51 Eq,
52 PartialOrd,
52 PartialOrd,
53 Ord,
53 Ord,
54 )]
54 )]
55 pub struct Revision(pub BaseRevision);
55 pub struct Revision(pub BaseRevision);
56
56
57 impl format_bytes::DisplayBytes for Revision {
57 impl format_bytes::DisplayBytes for Revision {
58 fn display_bytes(
58 fn display_bytes(
59 &self,
59 &self,
60 output: &mut dyn std::io::Write,
60 output: &mut dyn std::io::Write,
61 ) -> std::io::Result<()> {
61 ) -> std::io::Result<()> {
62 self.0.display_bytes(output)
62 self.0.display_bytes(output)
63 }
63 }
64 }
64 }
65
65
66 /// Unchecked Mercurial revision numbers.
66 /// Unchecked Mercurial revision numbers.
67 ///
67 ///
68 /// Values of this type have no guarantee of being a valid revision number
68 /// Values of this type have no guarantee of being a valid revision number
69 /// in any context. Use method `check_revision` to get a valid revision within
69 /// in any context. Use method `check_revision` to get a valid revision within
70 /// the appropriate index object.
70 /// the appropriate index object.
71 #[derive(
71 #[derive(
72 Debug,
72 Debug,
73 derive_more::Display,
73 derive_more::Display,
74 Clone,
74 Clone,
75 Copy,
75 Copy,
76 Hash,
76 Hash,
77 PartialEq,
77 PartialEq,
78 Eq,
78 Eq,
79 PartialOrd,
79 PartialOrd,
80 Ord,
80 Ord,
81 )]
81 )]
82 pub struct UncheckedRevision(pub BaseRevision);
82 pub struct UncheckedRevision(pub BaseRevision);
83
83
84 impl format_bytes::DisplayBytes for UncheckedRevision {
84 impl format_bytes::DisplayBytes for UncheckedRevision {
85 fn display_bytes(
85 fn display_bytes(
86 &self,
86 &self,
87 output: &mut dyn std::io::Write,
87 output: &mut dyn std::io::Write,
88 ) -> std::io::Result<()> {
88 ) -> std::io::Result<()> {
89 self.0.display_bytes(output)
89 self.0.display_bytes(output)
90 }
90 }
91 }
91 }
92
92
93 impl From<Revision> for UncheckedRevision {
93 impl From<Revision> for UncheckedRevision {
94 fn from(value: Revision) -> Self {
94 fn from(value: Revision) -> Self {
95 Self(value.0)
95 Self(value.0)
96 }
96 }
97 }
97 }
98
98
99 impl From<BaseRevision> for UncheckedRevision {
99 impl From<BaseRevision> for UncheckedRevision {
100 fn from(value: BaseRevision) -> Self {
100 fn from(value: BaseRevision) -> Self {
101 Self(value)
101 Self(value)
102 }
102 }
103 }
103 }
104
104
105 /// Marker expressing the absence of a parent
105 /// Marker expressing the absence of a parent
106 ///
106 ///
107 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
107 /// Independently of the actual representation, `NULL_REVISION` is guaranteed
108 /// to be smaller than all existing revisions.
108 /// to be smaller than all existing revisions.
109 pub const NULL_REVISION: Revision = Revision(-1);
109 pub const NULL_REVISION: Revision = Revision(-1);
110
110
111 /// Same as `mercurial.node.wdirrev`
111 /// Same as `mercurial.node.wdirrev`
112 ///
112 ///
113 /// This is also equal to `i32::max_value()`, but it's better to spell
113 /// This is also equal to `i32::max_value()`, but it's better to spell
114 /// it out explicitely, same as in `mercurial.node`
114 /// it out explicitely, same as in `mercurial.node`
115 #[allow(clippy::unreadable_literal)]
115 #[allow(clippy::unreadable_literal)]
116 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
116 pub const WORKING_DIRECTORY_REVISION: UncheckedRevision =
117 UncheckedRevision(0x7fffffff);
117 UncheckedRevision(0x7fffffff);
118
118
119 pub const WORKING_DIRECTORY_HEX: &str =
119 pub const WORKING_DIRECTORY_HEX: &str =
120 "ffffffffffffffffffffffffffffffffffffffff";
120 "ffffffffffffffffffffffffffffffffffffffff";
121
121
122 /// The simplest expression of what we need of Mercurial DAGs.
122 /// The simplest expression of what we need of Mercurial DAGs.
123 pub trait Graph {
123 pub trait Graph {
124 /// Return the two parents of the given `Revision`.
124 /// Return the two parents of the given `Revision`.
125 ///
125 ///
126 /// Each of the parents can be independently `NULL_REVISION`
126 /// Each of the parents can be independently `NULL_REVISION`
127 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
127 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError>;
128 }
128 }
129
129
130 #[derive(Clone, Debug, PartialEq)]
130 #[derive(Clone, Debug, PartialEq)]
131 pub enum GraphError {
131 pub enum GraphError {
132 ParentOutOfRange(Revision),
132 ParentOutOfRange(Revision),
133 }
133 }
134
134
135 /// The Mercurial Revlog Index
135 /// The Mercurial Revlog Index
136 ///
136 ///
137 /// This is currently limited to the minimal interface that is needed for
137 /// This is currently limited to the minimal interface that is needed for
138 /// the [`nodemap`](nodemap/index.html) module
138 /// the [`nodemap`](nodemap/index.html) module
139 pub trait RevlogIndex {
139 pub trait RevlogIndex {
140 /// Total number of Revisions referenced in this index
140 /// Total number of Revisions referenced in this index
141 fn len(&self) -> usize;
141 fn len(&self) -> usize;
142
142
143 fn is_empty(&self) -> bool {
143 fn is_empty(&self) -> bool {
144 self.len() == 0
144 self.len() == 0
145 }
145 }
146
146
147 /// Return a reference to the Node or `None` for `NULL_REVISION`
147 /// Return a reference to the Node or `None` for `NULL_REVISION`
148 fn node(&self, rev: Revision) -> Option<&Node>;
148 fn node(&self, rev: Revision) -> Option<&Node>;
149
149
150 /// Return a [`Revision`] if `rev` is a valid revision number for this
150 /// Return a [`Revision`] if `rev` is a valid revision number for this
151 /// index
151 /// index
152 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
152 fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> {
153 let rev = rev.0;
153 let rev = rev.0;
154
154
155 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
155 if rev == NULL_REVISION.0 || (rev >= 0 && (rev as usize) < self.len())
156 {
156 {
157 Some(Revision(rev))
157 Some(Revision(rev))
158 } else {
158 } else {
159 None
159 None
160 }
160 }
161 }
161 }
162 }
162 }
163
163
164 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
164 const REVISION_FLAG_CENSORED: u16 = 1 << 15;
165 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
165 const REVISION_FLAG_ELLIPSIS: u16 = 1 << 14;
166 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
166 const REVISION_FLAG_EXTSTORED: u16 = 1 << 13;
167 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
167 const REVISION_FLAG_HASCOPIESINFO: u16 = 1 << 12;
168
168
169 // Keep this in sync with REVIDX_KNOWN_FLAGS in
169 // Keep this in sync with REVIDX_KNOWN_FLAGS in
170 // mercurial/revlogutils/flagutil.py
170 // mercurial/revlogutils/flagutil.py
171 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
171 const REVIDX_KNOWN_FLAGS: u16 = REVISION_FLAG_CENSORED
172 | REVISION_FLAG_ELLIPSIS
172 | REVISION_FLAG_ELLIPSIS
173 | REVISION_FLAG_EXTSTORED
173 | REVISION_FLAG_EXTSTORED
174 | REVISION_FLAG_HASCOPIESINFO;
174 | REVISION_FLAG_HASCOPIESINFO;
175
175
176 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
176 const NULL_REVLOG_ENTRY_FLAGS: u16 = 0;
177
177
178 #[derive(Debug, derive_more::From, derive_more::Display)]
178 #[derive(Debug, derive_more::From, derive_more::Display)]
179 pub enum RevlogError {
179 pub enum RevlogError {
180 InvalidRevision,
180 InvalidRevision,
181 /// Working directory is not supported
181 /// Working directory is not supported
182 WDirUnsupported,
182 WDirUnsupported,
183 /// Found more than one entry whose ID match the requested prefix
183 /// Found more than one entry whose ID match the requested prefix
184 AmbiguousPrefix,
184 AmbiguousPrefix,
185 #[from]
185 #[from]
186 Other(HgError),
186 Other(HgError),
187 }
187 }
188
188
189 impl From<NodeMapError> for RevlogError {
189 impl From<NodeMapError> for RevlogError {
190 fn from(error: NodeMapError) -> Self {
190 fn from(error: NodeMapError) -> Self {
191 match error {
191 match error {
192 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
192 NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
193 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
193 NodeMapError::RevisionNotInIndex(rev) => RevlogError::corrupted(
194 format!("nodemap point to revision {} not in index", rev),
194 format!("nodemap point to revision {} not in index", rev),
195 ),
195 ),
196 }
196 }
197 }
197 }
198 }
198 }
199
199
200 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
200 fn corrupted<S: AsRef<str>>(context: S) -> HgError {
201 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
201 HgError::corrupted(format!("corrupted revlog, {}", context.as_ref()))
202 }
202 }
203
203
204 impl RevlogError {
204 impl RevlogError {
205 fn corrupted<S: AsRef<str>>(context: S) -> Self {
205 fn corrupted<S: AsRef<str>>(context: S) -> Self {
206 RevlogError::Other(corrupted(context))
206 RevlogError::Other(corrupted(context))
207 }
207 }
208 }
208 }
209
209
210 /// Read only implementation of revlog.
210 /// Read only implementation of revlog.
211 pub struct Revlog {
211 pub struct Revlog {
212 /// When index and data are not interleaved: bytes of the revlog index.
212 /// When index and data are not interleaved: bytes of the revlog index.
213 /// When index and data are interleaved: bytes of the revlog index and
213 /// When index and data are interleaved: bytes of the revlog index and
214 /// data.
214 /// data.
215 index: Index,
215 index: Index,
216 /// When index and data are not interleaved: bytes of the revlog data
216 /// When index and data are not interleaved: bytes of the revlog data
217 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
217 data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>>,
218 /// When present on disk: the persistent nodemap for this revlog
218 /// When present on disk: the persistent nodemap for this revlog
219 nodemap: Option<nodemap::NodeTree>,
219 nodemap: Option<nodemap::NodeTree>,
220 }
220 }
221
221
222 impl Graph for Revlog {
222 impl Graph for Revlog {
223 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
223 fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> {
224 self.index.parents(rev)
224 self.index.parents(rev)
225 }
225 }
226 }
226 }
227
227
228 #[derive(Debug, Copy, Clone)]
229 pub enum RevlogVersionOptions {
230 V0,
231 V1 { generaldelta: bool },
232 V2,
233 ChangelogV2 { compute_rank: bool },
234 }
235
236 /// Options to govern how a revlog should be opened, usually from the
237 /// repository configuration or requirements.
238 #[derive(Debug, Copy, Clone)]
239 pub struct RevlogOpenOptions {
240 /// The revlog version, along with any option specific to this version
241 pub version: RevlogVersionOptions,
242 /// Whether the revlog uses a persistent nodemap.
243 pub use_nodemap: bool,
244 // TODO other non-header/version options,
245 }
246
247 impl RevlogOpenOptions {
248 pub fn new() -> Self {
249 Self {
250 version: RevlogVersionOptions::V1 { generaldelta: true },
251 use_nodemap: false,
252 }
253 }
254
255 fn default_index_header(&self) -> index::IndexHeader {
256 index::IndexHeader {
257 header_bytes: match self.version {
258 RevlogVersionOptions::V0 => [0, 0, 0, 0],
259 RevlogVersionOptions::V1 { generaldelta } => {
260 [0, if generaldelta { 3 } else { 1 }, 0, 1]
261 }
262 RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(),
263 RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => {
264 0xD34Du32.to_be_bytes()
265 }
266 },
267 }
268 }
269 }
270
271 impl Default for RevlogOpenOptions {
272 fn default() -> Self {
273 Self::new()
274 }
275 }
276
228 impl Revlog {
277 impl Revlog {
229 /// Open a revlog index file.
278 /// Open a revlog index file.
230 ///
279 ///
231 /// It will also open the associated data file if index and data are not
280 /// It will also open the associated data file if index and data are not
232 /// interleaved.
281 /// interleaved.
233 pub fn open(
282 pub fn open(
234 store_vfs: &Vfs,
283 store_vfs: &Vfs,
235 index_path: impl AsRef<Path>,
284 index_path: impl AsRef<Path>,
236 data_path: Option<&Path>,
285 data_path: Option<&Path>,
237 use_nodemap: bool,
286 options: RevlogOpenOptions,
238 ) -> Result<Self, HgError> {
287 ) -> Result<Self, HgError> {
239 Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None)
288 Self::open_gen(store_vfs, index_path, data_path, options, None)
240 }
289 }
241
290
242 fn open_gen(
291 fn open_gen(
243 store_vfs: &Vfs,
292 store_vfs: &Vfs,
244 index_path: impl AsRef<Path>,
293 index_path: impl AsRef<Path>,
245 data_path: Option<&Path>,
294 data_path: Option<&Path>,
246 use_nodemap: bool,
295 options: RevlogOpenOptions,
247 nodemap_for_test: Option<nodemap::NodeTree>,
296 nodemap_for_test: Option<nodemap::NodeTree>,
248 ) -> Result<Self, HgError> {
297 ) -> Result<Self, HgError> {
249 let index_path = index_path.as_ref();
298 let index_path = index_path.as_ref();
250 let index = {
299 let index = {
251 match store_vfs.mmap_open_opt(index_path)? {
300 match store_vfs.mmap_open_opt(index_path)? {
252 None => Index::new(Box::<Vec<_>>::default()),
301 None => Index::new(
302 Box::<Vec<_>>::default(),
303 options.default_index_header(),
304 ),
253 Some(index_mmap) => {
305 Some(index_mmap) => {
254 let index = Index::new(Box::new(index_mmap))?;
306 let index = Index::new(
307 Box::new(index_mmap),
308 options.default_index_header(),
309 )?;
255 Ok(index)
310 Ok(index)
256 }
311 }
257 }
312 }
258 }?;
313 }?;
259
314
260 let default_data_path = index_path.with_extension("d");
315 let default_data_path = index_path.with_extension("d");
261
316
262 // type annotation required
317 // type annotation required
263 // won't recognize Mmap as Deref<Target = [u8]>
318 // won't recognize Mmap as Deref<Target = [u8]>
264 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
319 let data_bytes: Option<Box<dyn Deref<Target = [u8]> + Send>> =
265 if index.is_inline() {
320 if index.is_inline() {
266 None
321 None
267 } else {
322 } else {
268 let data_path = data_path.unwrap_or(&default_data_path);
323 let data_path = data_path.unwrap_or(&default_data_path);
269 let data_mmap = store_vfs.mmap_open(data_path)?;
324 let data_mmap = store_vfs.mmap_open(data_path)?;
270 Some(Box::new(data_mmap))
325 Some(Box::new(data_mmap))
271 };
326 };
272
327
273 let nodemap = if index.is_inline() || !use_nodemap {
328 let nodemap = if index.is_inline() || !options.use_nodemap {
274 None
329 None
275 } else {
330 } else {
276 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
331 NodeMapDocket::read_from_file(store_vfs, index_path)?.map(
277 |(docket, data)| {
332 |(docket, data)| {
278 nodemap::NodeTree::load_bytes(
333 nodemap::NodeTree::load_bytes(
279 Box::new(data),
334 Box::new(data),
280 docket.data_length,
335 docket.data_length,
281 )
336 )
282 },
337 },
283 )
338 )
284 };
339 };
285
340
286 let nodemap = nodemap_for_test.or(nodemap);
341 let nodemap = nodemap_for_test.or(nodemap);
287
342
288 Ok(Revlog {
343 Ok(Revlog {
289 index,
344 index,
290 data_bytes,
345 data_bytes,
291 nodemap,
346 nodemap,
292 })
347 })
293 }
348 }
294
349
295 /// Return number of entries of the `Revlog`.
350 /// Return number of entries of the `Revlog`.
296 pub fn len(&self) -> usize {
351 pub fn len(&self) -> usize {
297 self.index.len()
352 self.index.len()
298 }
353 }
299
354
300 /// Returns `true` if the `Revlog` has zero `entries`.
355 /// Returns `true` if the `Revlog` has zero `entries`.
301 pub fn is_empty(&self) -> bool {
356 pub fn is_empty(&self) -> bool {
302 self.index.is_empty()
357 self.index.is_empty()
303 }
358 }
304
359
305 /// Returns the node ID for the given revision number, if it exists in this
360 /// Returns the node ID for the given revision number, if it exists in this
306 /// revlog
361 /// revlog
307 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
362 pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> {
308 if rev == NULL_REVISION.into() {
363 if rev == NULL_REVISION.into() {
309 return Some(&NULL_NODE);
364 return Some(&NULL_NODE);
310 }
365 }
311 let rev = self.index.check_revision(rev)?;
366 let rev = self.index.check_revision(rev)?;
312 Some(self.index.get_entry(rev)?.hash())
367 Some(self.index.get_entry(rev)?.hash())
313 }
368 }
314
369
315 /// Return the revision number for the given node ID, if it exists in this
370 /// Return the revision number for the given node ID, if it exists in this
316 /// revlog
371 /// revlog
317 pub fn rev_from_node(
372 pub fn rev_from_node(
318 &self,
373 &self,
319 node: NodePrefix,
374 node: NodePrefix,
320 ) -> Result<Revision, RevlogError> {
375 ) -> Result<Revision, RevlogError> {
321 if let Some(nodemap) = &self.nodemap {
376 if let Some(nodemap) = &self.nodemap {
322 nodemap
377 nodemap
323 .find_bin(&self.index, node)?
378 .find_bin(&self.index, node)?
324 .ok_or(RevlogError::InvalidRevision)
379 .ok_or(RevlogError::InvalidRevision)
325 } else {
380 } else {
326 self.rev_from_node_no_persistent_nodemap(node)
381 self.rev_from_node_no_persistent_nodemap(node)
327 }
382 }
328 }
383 }
329
384
330 /// Same as `rev_from_node`, without using a persistent nodemap
385 /// Same as `rev_from_node`, without using a persistent nodemap
331 ///
386 ///
332 /// This is used as fallback when a persistent nodemap is not present.
387 /// This is used as fallback when a persistent nodemap is not present.
333 /// This happens when the persistent-nodemap experimental feature is not
388 /// This happens when the persistent-nodemap experimental feature is not
334 /// enabled, or for small revlogs.
389 /// enabled, or for small revlogs.
335 fn rev_from_node_no_persistent_nodemap(
390 fn rev_from_node_no_persistent_nodemap(
336 &self,
391 &self,
337 node: NodePrefix,
392 node: NodePrefix,
338 ) -> Result<Revision, RevlogError> {
393 ) -> Result<Revision, RevlogError> {
339 // Linear scan of the revlog
394 // Linear scan of the revlog
340 // TODO: consider building a non-persistent nodemap in memory to
395 // TODO: consider building a non-persistent nodemap in memory to
341 // optimize these cases.
396 // optimize these cases.
342 let mut found_by_prefix = None;
397 let mut found_by_prefix = None;
343 for rev in (-1..self.len() as BaseRevision).rev() {
398 for rev in (-1..self.len() as BaseRevision).rev() {
344 let rev = Revision(rev as BaseRevision);
399 let rev = Revision(rev as BaseRevision);
345 let candidate_node = if rev == Revision(-1) {
400 let candidate_node = if rev == Revision(-1) {
346 NULL_NODE
401 NULL_NODE
347 } else {
402 } else {
348 let index_entry =
403 let index_entry =
349 self.index.get_entry(rev).ok_or_else(|| {
404 self.index.get_entry(rev).ok_or_else(|| {
350 HgError::corrupted(
405 HgError::corrupted(
351 "revlog references a revision not in the index",
406 "revlog references a revision not in the index",
352 )
407 )
353 })?;
408 })?;
354 *index_entry.hash()
409 *index_entry.hash()
355 };
410 };
356 if node == candidate_node {
411 if node == candidate_node {
357 return Ok(rev);
412 return Ok(rev);
358 }
413 }
359 if node.is_prefix_of(&candidate_node) {
414 if node.is_prefix_of(&candidate_node) {
360 if found_by_prefix.is_some() {
415 if found_by_prefix.is_some() {
361 return Err(RevlogError::AmbiguousPrefix);
416 return Err(RevlogError::AmbiguousPrefix);
362 }
417 }
363 found_by_prefix = Some(rev)
418 found_by_prefix = Some(rev)
364 }
419 }
365 }
420 }
366 found_by_prefix.ok_or(RevlogError::InvalidRevision)
421 found_by_prefix.ok_or(RevlogError::InvalidRevision)
367 }
422 }
368
423
369 /// Returns whether the given revision exists in this revlog.
424 /// Returns whether the given revision exists in this revlog.
370 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
425 pub fn has_rev(&self, rev: UncheckedRevision) -> bool {
371 self.index.check_revision(rev).is_some()
426 self.index.check_revision(rev).is_some()
372 }
427 }
373
428
374 /// Return the full data associated to a revision.
429 /// Return the full data associated to a revision.
375 ///
430 ///
376 /// All entries required to build the final data out of deltas will be
431 /// All entries required to build the final data out of deltas will be
377 /// retrieved as needed, and the deltas will be applied to the inital
432 /// retrieved as needed, and the deltas will be applied to the inital
378 /// snapshot to rebuild the final data.
433 /// snapshot to rebuild the final data.
379 pub fn get_rev_data(
434 pub fn get_rev_data(
380 &self,
435 &self,
381 rev: UncheckedRevision,
436 rev: UncheckedRevision,
382 ) -> Result<Cow<[u8]>, RevlogError> {
437 ) -> Result<Cow<[u8]>, RevlogError> {
383 if rev == NULL_REVISION.into() {
438 if rev == NULL_REVISION.into() {
384 return Ok(Cow::Borrowed(&[]));
439 return Ok(Cow::Borrowed(&[]));
385 };
440 };
386 self.get_entry(rev)?.data()
441 self.get_entry(rev)?.data()
387 }
442 }
388
443
389 /// [`Self::get_rev_data`] for checked revisions.
444 /// [`Self::get_rev_data`] for checked revisions.
390 pub fn get_rev_data_for_checked_rev(
445 pub fn get_rev_data_for_checked_rev(
391 &self,
446 &self,
392 rev: Revision,
447 rev: Revision,
393 ) -> Result<Cow<[u8]>, RevlogError> {
448 ) -> Result<Cow<[u8]>, RevlogError> {
394 if rev == NULL_REVISION {
449 if rev == NULL_REVISION {
395 return Ok(Cow::Borrowed(&[]));
450 return Ok(Cow::Borrowed(&[]));
396 };
451 };
397 self.get_entry_for_checked_rev(rev)?.data()
452 self.get_entry_for_checked_rev(rev)?.data()
398 }
453 }
399
454
400 /// Check the hash of some given data against the recorded hash.
455 /// Check the hash of some given data against the recorded hash.
401 pub fn check_hash(
456 pub fn check_hash(
402 &self,
457 &self,
403 p1: Revision,
458 p1: Revision,
404 p2: Revision,
459 p2: Revision,
405 expected: &[u8],
460 expected: &[u8],
406 data: &[u8],
461 data: &[u8],
407 ) -> bool {
462 ) -> bool {
408 let e1 = self.index.get_entry(p1);
463 let e1 = self.index.get_entry(p1);
409 let h1 = match e1 {
464 let h1 = match e1 {
410 Some(ref entry) => entry.hash(),
465 Some(ref entry) => entry.hash(),
411 None => &NULL_NODE,
466 None => &NULL_NODE,
412 };
467 };
413 let e2 = self.index.get_entry(p2);
468 let e2 = self.index.get_entry(p2);
414 let h2 = match e2 {
469 let h2 = match e2 {
415 Some(ref entry) => entry.hash(),
470 Some(ref entry) => entry.hash(),
416 None => &NULL_NODE,
471 None => &NULL_NODE,
417 };
472 };
418
473
419 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
474 hash(data, h1.as_bytes(), h2.as_bytes()) == expected
420 }
475 }
421
476
422 /// Build the full data of a revision out its snapshot
477 /// Build the full data of a revision out its snapshot
423 /// and its deltas.
478 /// and its deltas.
424 fn build_data_from_deltas(
479 fn build_data_from_deltas(
425 snapshot: RevlogEntry,
480 snapshot: RevlogEntry,
426 deltas: &[RevlogEntry],
481 deltas: &[RevlogEntry],
427 ) -> Result<Vec<u8>, HgError> {
482 ) -> Result<Vec<u8>, HgError> {
428 let snapshot = snapshot.data_chunk()?;
483 let snapshot = snapshot.data_chunk()?;
429 let deltas = deltas
484 let deltas = deltas
430 .iter()
485 .iter()
431 .rev()
486 .rev()
432 .map(RevlogEntry::data_chunk)
487 .map(RevlogEntry::data_chunk)
433 .collect::<Result<Vec<_>, _>>()?;
488 .collect::<Result<Vec<_>, _>>()?;
434 let patches: Vec<_> =
489 let patches: Vec<_> =
435 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
490 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
436 let patch = patch::fold_patch_lists(&patches);
491 let patch = patch::fold_patch_lists(&patches);
437 Ok(patch.apply(&snapshot))
492 Ok(patch.apply(&snapshot))
438 }
493 }
439
494
440 /// Return the revlog data.
495 /// Return the revlog data.
441 fn data(&self) -> &[u8] {
496 fn data(&self) -> &[u8] {
442 match &self.data_bytes {
497 match &self.data_bytes {
443 Some(data_bytes) => data_bytes,
498 Some(data_bytes) => data_bytes,
444 None => panic!(
499 None => panic!(
445 "forgot to load the data or trying to access inline data"
500 "forgot to load the data or trying to access inline data"
446 ),
501 ),
447 }
502 }
448 }
503 }
449
504
450 pub fn make_null_entry(&self) -> RevlogEntry {
505 pub fn make_null_entry(&self) -> RevlogEntry {
451 RevlogEntry {
506 RevlogEntry {
452 revlog: self,
507 revlog: self,
453 rev: NULL_REVISION,
508 rev: NULL_REVISION,
454 bytes: b"",
509 bytes: b"",
455 compressed_len: 0,
510 compressed_len: 0,
456 uncompressed_len: 0,
511 uncompressed_len: 0,
457 base_rev_or_base_of_delta_chain: None,
512 base_rev_or_base_of_delta_chain: None,
458 p1: NULL_REVISION,
513 p1: NULL_REVISION,
459 p2: NULL_REVISION,
514 p2: NULL_REVISION,
460 flags: NULL_REVLOG_ENTRY_FLAGS,
515 flags: NULL_REVLOG_ENTRY_FLAGS,
461 hash: NULL_NODE,
516 hash: NULL_NODE,
462 }
517 }
463 }
518 }
464
519
465 fn get_entry_for_checked_rev(
520 fn get_entry_for_checked_rev(
466 &self,
521 &self,
467 rev: Revision,
522 rev: Revision,
468 ) -> Result<RevlogEntry, RevlogError> {
523 ) -> Result<RevlogEntry, RevlogError> {
469 if rev == NULL_REVISION {
524 if rev == NULL_REVISION {
470 return Ok(self.make_null_entry());
525 return Ok(self.make_null_entry());
471 }
526 }
472 let index_entry = self
527 let index_entry = self
473 .index
528 .index
474 .get_entry(rev)
529 .get_entry(rev)
475 .ok_or(RevlogError::InvalidRevision)?;
530 .ok_or(RevlogError::InvalidRevision)?;
476 let start = index_entry.offset();
531 let start = index_entry.offset();
477 let end = start + index_entry.compressed_len() as usize;
532 let end = start + index_entry.compressed_len() as usize;
478 let data = if self.index.is_inline() {
533 let data = if self.index.is_inline() {
479 self.index.data(start, end)
534 self.index.data(start, end)
480 } else {
535 } else {
481 &self.data()[start..end]
536 &self.data()[start..end]
482 };
537 };
483 let base_rev = self
538 let base_rev = self
484 .index
539 .index
485 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
540 .check_revision(index_entry.base_revision_or_base_of_delta_chain())
486 .ok_or_else(|| {
541 .ok_or_else(|| {
487 RevlogError::corrupted(format!(
542 RevlogError::corrupted(format!(
488 "base revision for rev {} is invalid",
543 "base revision for rev {} is invalid",
489 rev
544 rev
490 ))
545 ))
491 })?;
546 })?;
492 let p1 =
547 let p1 =
493 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
548 self.index.check_revision(index_entry.p1()).ok_or_else(|| {
494 RevlogError::corrupted(format!(
549 RevlogError::corrupted(format!(
495 "p1 for rev {} is invalid",
550 "p1 for rev {} is invalid",
496 rev
551 rev
497 ))
552 ))
498 })?;
553 })?;
499 let p2 =
554 let p2 =
500 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
555 self.index.check_revision(index_entry.p2()).ok_or_else(|| {
501 RevlogError::corrupted(format!(
556 RevlogError::corrupted(format!(
502 "p2 for rev {} is invalid",
557 "p2 for rev {} is invalid",
503 rev
558 rev
504 ))
559 ))
505 })?;
560 })?;
506 let entry = RevlogEntry {
561 let entry = RevlogEntry {
507 revlog: self,
562 revlog: self,
508 rev,
563 rev,
509 bytes: data,
564 bytes: data,
510 compressed_len: index_entry.compressed_len(),
565 compressed_len: index_entry.compressed_len(),
511 uncompressed_len: index_entry.uncompressed_len(),
566 uncompressed_len: index_entry.uncompressed_len(),
512 base_rev_or_base_of_delta_chain: if base_rev == rev {
567 base_rev_or_base_of_delta_chain: if base_rev == rev {
513 None
568 None
514 } else {
569 } else {
515 Some(base_rev)
570 Some(base_rev)
516 },
571 },
517 p1,
572 p1,
518 p2,
573 p2,
519 flags: index_entry.flags(),
574 flags: index_entry.flags(),
520 hash: *index_entry.hash(),
575 hash: *index_entry.hash(),
521 };
576 };
522 Ok(entry)
577 Ok(entry)
523 }
578 }
524
579
525 /// Get an entry of the revlog.
580 /// Get an entry of the revlog.
526 pub fn get_entry(
581 pub fn get_entry(
527 &self,
582 &self,
528 rev: UncheckedRevision,
583 rev: UncheckedRevision,
529 ) -> Result<RevlogEntry, RevlogError> {
584 ) -> Result<RevlogEntry, RevlogError> {
530 if rev == NULL_REVISION.into() {
585 if rev == NULL_REVISION.into() {
531 return Ok(self.make_null_entry());
586 return Ok(self.make_null_entry());
532 }
587 }
533 let rev = self.index.check_revision(rev).ok_or_else(|| {
588 let rev = self.index.check_revision(rev).ok_or_else(|| {
534 RevlogError::corrupted(format!("rev {} is invalid", rev))
589 RevlogError::corrupted(format!("rev {} is invalid", rev))
535 })?;
590 })?;
536 self.get_entry_for_checked_rev(rev)
591 self.get_entry_for_checked_rev(rev)
537 }
592 }
538 }
593 }
539
594
540 /// The revlog entry's bytes and the necessary informations to extract
595 /// The revlog entry's bytes and the necessary informations to extract
541 /// the entry's data.
596 /// the entry's data.
542 #[derive(Clone)]
597 #[derive(Clone)]
543 pub struct RevlogEntry<'revlog> {
598 pub struct RevlogEntry<'revlog> {
544 revlog: &'revlog Revlog,
599 revlog: &'revlog Revlog,
545 rev: Revision,
600 rev: Revision,
546 bytes: &'revlog [u8],
601 bytes: &'revlog [u8],
547 compressed_len: u32,
602 compressed_len: u32,
548 uncompressed_len: i32,
603 uncompressed_len: i32,
549 base_rev_or_base_of_delta_chain: Option<Revision>,
604 base_rev_or_base_of_delta_chain: Option<Revision>,
550 p1: Revision,
605 p1: Revision,
551 p2: Revision,
606 p2: Revision,
552 flags: u16,
607 flags: u16,
553 hash: Node,
608 hash: Node,
554 }
609 }
555
610
556 thread_local! {
611 thread_local! {
557 // seems fine to [unwrap] here: this can only fail due to memory allocation
612 // seems fine to [unwrap] here: this can only fail due to memory allocation
558 // failing, and it's normal for that to cause panic.
613 // failing, and it's normal for that to cause panic.
559 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
614 static ZSTD_DECODER : RefCell<zstd::bulk::Decompressor<'static>> =
560 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
615 RefCell::new(zstd::bulk::Decompressor::new().ok().unwrap());
561 }
616 }
562
617
563 fn zstd_decompress_to_buffer(
618 fn zstd_decompress_to_buffer(
564 bytes: &[u8],
619 bytes: &[u8],
565 buf: &mut Vec<u8>,
620 buf: &mut Vec<u8>,
566 ) -> Result<usize, std::io::Error> {
621 ) -> Result<usize, std::io::Error> {
567 ZSTD_DECODER
622 ZSTD_DECODER
568 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
623 .with(|decoder| decoder.borrow_mut().decompress_to_buffer(bytes, buf))
569 }
624 }
570
625
571 impl<'revlog> RevlogEntry<'revlog> {
626 impl<'revlog> RevlogEntry<'revlog> {
572 pub fn revision(&self) -> Revision {
627 pub fn revision(&self) -> Revision {
573 self.rev
628 self.rev
574 }
629 }
575
630
576 pub fn node(&self) -> &Node {
631 pub fn node(&self) -> &Node {
577 &self.hash
632 &self.hash
578 }
633 }
579
634
580 pub fn uncompressed_len(&self) -> Option<u32> {
635 pub fn uncompressed_len(&self) -> Option<u32> {
581 u32::try_from(self.uncompressed_len).ok()
636 u32::try_from(self.uncompressed_len).ok()
582 }
637 }
583
638
584 pub fn has_p1(&self) -> bool {
639 pub fn has_p1(&self) -> bool {
585 self.p1 != NULL_REVISION
640 self.p1 != NULL_REVISION
586 }
641 }
587
642
588 pub fn p1_entry(
643 pub fn p1_entry(
589 &self,
644 &self,
590 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
645 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
591 if self.p1 == NULL_REVISION {
646 if self.p1 == NULL_REVISION {
592 Ok(None)
647 Ok(None)
593 } else {
648 } else {
594 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
649 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?))
595 }
650 }
596 }
651 }
597
652
598 pub fn p2_entry(
653 pub fn p2_entry(
599 &self,
654 &self,
600 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
655 ) -> Result<Option<RevlogEntry<'revlog>>, RevlogError> {
601 if self.p2 == NULL_REVISION {
656 if self.p2 == NULL_REVISION {
602 Ok(None)
657 Ok(None)
603 } else {
658 } else {
604 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
659 Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?))
605 }
660 }
606 }
661 }
607
662
608 pub fn p1(&self) -> Option<Revision> {
663 pub fn p1(&self) -> Option<Revision> {
609 if self.p1 == NULL_REVISION {
664 if self.p1 == NULL_REVISION {
610 None
665 None
611 } else {
666 } else {
612 Some(self.p1)
667 Some(self.p1)
613 }
668 }
614 }
669 }
615
670
616 pub fn p2(&self) -> Option<Revision> {
671 pub fn p2(&self) -> Option<Revision> {
617 if self.p2 == NULL_REVISION {
672 if self.p2 == NULL_REVISION {
618 None
673 None
619 } else {
674 } else {
620 Some(self.p2)
675 Some(self.p2)
621 }
676 }
622 }
677 }
623
678
624 pub fn is_censored(&self) -> bool {
679 pub fn is_censored(&self) -> bool {
625 (self.flags & REVISION_FLAG_CENSORED) != 0
680 (self.flags & REVISION_FLAG_CENSORED) != 0
626 }
681 }
627
682
628 pub fn has_length_affecting_flag_processor(&self) -> bool {
683 pub fn has_length_affecting_flag_processor(&self) -> bool {
629 // Relevant Python code: revlog.size()
684 // Relevant Python code: revlog.size()
630 // note: ELLIPSIS is known to not change the content
685 // note: ELLIPSIS is known to not change the content
631 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
686 (self.flags & (REVIDX_KNOWN_FLAGS ^ REVISION_FLAG_ELLIPSIS)) != 0
632 }
687 }
633
688
634 /// The data for this entry, after resolving deltas if any.
689 /// The data for this entry, after resolving deltas if any.
635 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
690 pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
636 let mut entry = self.clone();
691 let mut entry = self.clone();
637 let mut delta_chain = vec![];
692 let mut delta_chain = vec![];
638
693
639 // The meaning of `base_rev_or_base_of_delta_chain` depends on
694 // The meaning of `base_rev_or_base_of_delta_chain` depends on
640 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
695 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
641 // `mercurial/revlogutils/constants.py` and the code in
696 // `mercurial/revlogutils/constants.py` and the code in
642 // [_chaininfo] and in [index_deltachain].
697 // [_chaininfo] and in [index_deltachain].
643 let uses_generaldelta = self.revlog.index.uses_generaldelta();
698 let uses_generaldelta = self.revlog.index.uses_generaldelta();
644 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
699 while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain {
645 entry = if uses_generaldelta {
700 entry = if uses_generaldelta {
646 delta_chain.push(entry);
701 delta_chain.push(entry);
647 self.revlog.get_entry_for_checked_rev(base_rev)?
702 self.revlog.get_entry_for_checked_rev(base_rev)?
648 } else {
703 } else {
649 let base_rev = UncheckedRevision(entry.rev.0 - 1);
704 let base_rev = UncheckedRevision(entry.rev.0 - 1);
650 delta_chain.push(entry);
705 delta_chain.push(entry);
651 self.revlog.get_entry(base_rev)?
706 self.revlog.get_entry(base_rev)?
652 };
707 };
653 }
708 }
654
709
655 let data = if delta_chain.is_empty() {
710 let data = if delta_chain.is_empty() {
656 entry.data_chunk()?
711 entry.data_chunk()?
657 } else {
712 } else {
658 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
713 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
659 };
714 };
660
715
661 Ok(data)
716 Ok(data)
662 }
717 }
663
718
664 fn check_data(
719 fn check_data(
665 &self,
720 &self,
666 data: Cow<'revlog, [u8]>,
721 data: Cow<'revlog, [u8]>,
667 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
722 ) -> Result<Cow<'revlog, [u8]>, RevlogError> {
668 if self.revlog.check_hash(
723 if self.revlog.check_hash(
669 self.p1,
724 self.p1,
670 self.p2,
725 self.p2,
671 self.hash.as_bytes(),
726 self.hash.as_bytes(),
672 &data,
727 &data,
673 ) {
728 ) {
674 Ok(data)
729 Ok(data)
675 } else {
730 } else {
676 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
731 if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 {
677 return Err(HgError::unsupported(
732 return Err(HgError::unsupported(
678 "ellipsis revisions are not supported by rhg",
733 "ellipsis revisions are not supported by rhg",
679 )
734 )
680 .into());
735 .into());
681 }
736 }
682 Err(corrupted(format!(
737 Err(corrupted(format!(
683 "hash check failed for revision {}",
738 "hash check failed for revision {}",
684 self.rev
739 self.rev
685 ))
740 ))
686 .into())
741 .into())
687 }
742 }
688 }
743 }
689
744
690 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
745 pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> {
691 let data = self.rawdata()?;
746 let data = self.rawdata()?;
692 if self.rev == NULL_REVISION {
747 if self.rev == NULL_REVISION {
693 return Ok(data);
748 return Ok(data);
694 }
749 }
695 if self.is_censored() {
750 if self.is_censored() {
696 return Err(HgError::CensoredNodeError.into());
751 return Err(HgError::CensoredNodeError.into());
697 }
752 }
698 self.check_data(data)
753 self.check_data(data)
699 }
754 }
700
755
701 /// Extract the data contained in the entry.
756 /// Extract the data contained in the entry.
702 /// This may be a delta. (See `is_delta`.)
757 /// This may be a delta. (See `is_delta`.)
703 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
758 fn data_chunk(&self) -> Result<Cow<'revlog, [u8]>, HgError> {
704 if self.bytes.is_empty() {
759 if self.bytes.is_empty() {
705 return Ok(Cow::Borrowed(&[]));
760 return Ok(Cow::Borrowed(&[]));
706 }
761 }
707 match self.bytes[0] {
762 match self.bytes[0] {
708 // Revision data is the entirety of the entry, including this
763 // Revision data is the entirety of the entry, including this
709 // header.
764 // header.
710 b'\0' => Ok(Cow::Borrowed(self.bytes)),
765 b'\0' => Ok(Cow::Borrowed(self.bytes)),
711 // Raw revision data follows.
766 // Raw revision data follows.
712 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
767 b'u' => Ok(Cow::Borrowed(&self.bytes[1..])),
713 // zlib (RFC 1950) data.
768 // zlib (RFC 1950) data.
714 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
769 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
715 // zstd data.
770 // zstd data.
716 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
771 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
717 // A proper new format should have had a repo/store requirement.
772 // A proper new format should have had a repo/store requirement.
718 format_type => Err(corrupted(format!(
773 format_type => Err(corrupted(format!(
719 "unknown compression header '{}'",
774 "unknown compression header '{}'",
720 format_type
775 format_type
721 ))),
776 ))),
722 }
777 }
723 }
778 }
724
779
725 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
780 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
726 let mut decoder = ZlibDecoder::new(self.bytes);
781 let mut decoder = ZlibDecoder::new(self.bytes);
727 if self.is_delta() {
782 if self.is_delta() {
728 let mut buf = Vec::with_capacity(self.compressed_len as usize);
783 let mut buf = Vec::with_capacity(self.compressed_len as usize);
729 decoder
784 decoder
730 .read_to_end(&mut buf)
785 .read_to_end(&mut buf)
731 .map_err(|e| corrupted(e.to_string()))?;
786 .map_err(|e| corrupted(e.to_string()))?;
732 Ok(buf)
787 Ok(buf)
733 } else {
788 } else {
734 let cap = self.uncompressed_len.max(0) as usize;
789 let cap = self.uncompressed_len.max(0) as usize;
735 let mut buf = vec![0; cap];
790 let mut buf = vec![0; cap];
736 decoder
791 decoder
737 .read_exact(&mut buf)
792 .read_exact(&mut buf)
738 .map_err(|e| corrupted(e.to_string()))?;
793 .map_err(|e| corrupted(e.to_string()))?;
739 Ok(buf)
794 Ok(buf)
740 }
795 }
741 }
796 }
742
797
743 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
798 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
744 let cap = self.uncompressed_len.max(0) as usize;
799 let cap = self.uncompressed_len.max(0) as usize;
745 if self.is_delta() {
800 if self.is_delta() {
746 // [cap] is usually an over-estimate of the space needed because
801 // [cap] is usually an over-estimate of the space needed because
747 // it's the length of delta-decoded data, but we're interested
802 // it's the length of delta-decoded data, but we're interested
748 // in the size of the delta.
803 // in the size of the delta.
749 // This means we have to [shrink_to_fit] to avoid holding on
804 // This means we have to [shrink_to_fit] to avoid holding on
750 // to a large chunk of memory, but it also means we must have a
805 // to a large chunk of memory, but it also means we must have a
751 // fallback branch, for the case when the delta is longer than
806 // fallback branch, for the case when the delta is longer than
752 // the original data (surprisingly, this does happen in practice)
807 // the original data (surprisingly, this does happen in practice)
753 let mut buf = Vec::with_capacity(cap);
808 let mut buf = Vec::with_capacity(cap);
754 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
809 match zstd_decompress_to_buffer(self.bytes, &mut buf) {
755 Ok(_) => buf.shrink_to_fit(),
810 Ok(_) => buf.shrink_to_fit(),
756 Err(_) => {
811 Err(_) => {
757 buf.clear();
812 buf.clear();
758 zstd::stream::copy_decode(self.bytes, &mut buf)
813 zstd::stream::copy_decode(self.bytes, &mut buf)
759 .map_err(|e| corrupted(e.to_string()))?;
814 .map_err(|e| corrupted(e.to_string()))?;
760 }
815 }
761 };
816 };
762 Ok(buf)
817 Ok(buf)
763 } else {
818 } else {
764 let mut buf = Vec::with_capacity(cap);
819 let mut buf = Vec::with_capacity(cap);
765 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
820 let len = zstd_decompress_to_buffer(self.bytes, &mut buf)
766 .map_err(|e| corrupted(e.to_string()))?;
821 .map_err(|e| corrupted(e.to_string()))?;
767 if len != self.uncompressed_len as usize {
822 if len != self.uncompressed_len as usize {
768 Err(corrupted("uncompressed length does not match"))
823 Err(corrupted("uncompressed length does not match"))
769 } else {
824 } else {
770 Ok(buf)
825 Ok(buf)
771 }
826 }
772 }
827 }
773 }
828 }
774
829
775 /// Tell if the entry is a snapshot or a delta
830 /// Tell if the entry is a snapshot or a delta
776 /// (influences on decompression).
831 /// (influences on decompression).
777 fn is_delta(&self) -> bool {
832 fn is_delta(&self) -> bool {
778 self.base_rev_or_base_of_delta_chain.is_some()
833 self.base_rev_or_base_of_delta_chain.is_some()
779 }
834 }
780 }
835 }
781
836
782 /// Calculate the hash of a revision given its data and its parents.
837 /// Calculate the hash of a revision given its data and its parents.
783 fn hash(
838 fn hash(
784 data: &[u8],
839 data: &[u8],
785 p1_hash: &[u8],
840 p1_hash: &[u8],
786 p2_hash: &[u8],
841 p2_hash: &[u8],
787 ) -> [u8; NODE_BYTES_LENGTH] {
842 ) -> [u8; NODE_BYTES_LENGTH] {
788 let mut hasher = Sha1::new();
843 let mut hasher = Sha1::new();
789 let (a, b) = (p1_hash, p2_hash);
844 let (a, b) = (p1_hash, p2_hash);
790 if a > b {
845 if a > b {
791 hasher.update(b);
846 hasher.update(b);
792 hasher.update(a);
847 hasher.update(a);
793 } else {
848 } else {
794 hasher.update(a);
849 hasher.update(a);
795 hasher.update(b);
850 hasher.update(b);
796 }
851 }
797 hasher.update(data);
852 hasher.update(data);
798 *hasher.finalize().as_ref()
853 *hasher.finalize().as_ref()
799 }
854 }
800
855
801 #[cfg(test)]
856 #[cfg(test)]
802 mod tests {
857 mod tests {
803 use super::*;
858 use super::*;
804 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
859 use crate::index::{IndexEntryBuilder, INDEX_ENTRY_SIZE};
805 use itertools::Itertools;
860 use itertools::Itertools;
806
861
807 #[test]
862 #[test]
808 fn test_empty() {
863 fn test_empty() {
809 let temp = tempfile::tempdir().unwrap();
864 let temp = tempfile::tempdir().unwrap();
810 let vfs = Vfs { base: temp.path() };
865 let vfs = Vfs { base: temp.path() };
811 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
866 std::fs::write(temp.path().join("foo.i"), b"").unwrap();
812 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
867 let revlog =
868 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
869 .unwrap();
813 assert!(revlog.is_empty());
870 assert!(revlog.is_empty());
814 assert_eq!(revlog.len(), 0);
871 assert_eq!(revlog.len(), 0);
815 assert!(revlog.get_entry(0.into()).is_err());
872 assert!(revlog.get_entry(0.into()).is_err());
816 assert!(!revlog.has_rev(0.into()));
873 assert!(!revlog.has_rev(0.into()));
817 assert_eq!(
874 assert_eq!(
818 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
875 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
819 NULL_REVISION
876 NULL_REVISION
820 );
877 );
821 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
878 let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap();
822 assert_eq!(null_entry.revision(), NULL_REVISION);
879 assert_eq!(null_entry.revision(), NULL_REVISION);
823 assert!(null_entry.data().unwrap().is_empty());
880 assert!(null_entry.data().unwrap().is_empty());
824 }
881 }
825
882
826 #[test]
883 #[test]
827 fn test_inline() {
884 fn test_inline() {
828 let temp = tempfile::tempdir().unwrap();
885 let temp = tempfile::tempdir().unwrap();
829 let vfs = Vfs { base: temp.path() };
886 let vfs = Vfs { base: temp.path() };
830 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
887 let node0 = Node::from_hex("2ed2a3912a0b24502043eae84ee4b279c18b90dd")
831 .unwrap();
888 .unwrap();
832 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
889 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
833 .unwrap();
890 .unwrap();
834 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
891 let node2 = Node::from_hex("dd6ad206e907be60927b5a3117b97dffb2590582")
835 .unwrap();
892 .unwrap();
836 let entry0_bytes = IndexEntryBuilder::new()
893 let entry0_bytes = IndexEntryBuilder::new()
837 .is_first(true)
894 .is_first(true)
838 .with_version(1)
895 .with_version(1)
839 .with_inline(true)
896 .with_inline(true)
840 .with_offset(INDEX_ENTRY_SIZE)
897 .with_offset(INDEX_ENTRY_SIZE)
841 .with_node(node0)
898 .with_node(node0)
842 .build();
899 .build();
843 let entry1_bytes = IndexEntryBuilder::new()
900 let entry1_bytes = IndexEntryBuilder::new()
844 .with_offset(INDEX_ENTRY_SIZE)
901 .with_offset(INDEX_ENTRY_SIZE)
845 .with_node(node1)
902 .with_node(node1)
846 .build();
903 .build();
847 let entry2_bytes = IndexEntryBuilder::new()
904 let entry2_bytes = IndexEntryBuilder::new()
848 .with_offset(INDEX_ENTRY_SIZE)
905 .with_offset(INDEX_ENTRY_SIZE)
849 .with_p1(Revision(0))
906 .with_p1(Revision(0))
850 .with_p2(Revision(1))
907 .with_p2(Revision(1))
851 .with_node(node2)
908 .with_node(node2)
852 .build();
909 .build();
853 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
910 let contents = vec![entry0_bytes, entry1_bytes, entry2_bytes]
854 .into_iter()
911 .into_iter()
855 .flatten()
912 .flatten()
856 .collect_vec();
913 .collect_vec();
857 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
914 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
858 let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap();
915 let revlog =
916 Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new())
917 .unwrap();
859
918
860 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
919 let entry0 = revlog.get_entry(0.into()).ok().unwrap();
861 assert_eq!(entry0.revision(), Revision(0));
920 assert_eq!(entry0.revision(), Revision(0));
862 assert_eq!(*entry0.node(), node0);
921 assert_eq!(*entry0.node(), node0);
863 assert!(!entry0.has_p1());
922 assert!(!entry0.has_p1());
864 assert_eq!(entry0.p1(), None);
923 assert_eq!(entry0.p1(), None);
865 assert_eq!(entry0.p2(), None);
924 assert_eq!(entry0.p2(), None);
866 let p1_entry = entry0.p1_entry().unwrap();
925 let p1_entry = entry0.p1_entry().unwrap();
867 assert!(p1_entry.is_none());
926 assert!(p1_entry.is_none());
868 let p2_entry = entry0.p2_entry().unwrap();
927 let p2_entry = entry0.p2_entry().unwrap();
869 assert!(p2_entry.is_none());
928 assert!(p2_entry.is_none());
870
929
871 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
930 let entry1 = revlog.get_entry(1.into()).ok().unwrap();
872 assert_eq!(entry1.revision(), Revision(1));
931 assert_eq!(entry1.revision(), Revision(1));
873 assert_eq!(*entry1.node(), node1);
932 assert_eq!(*entry1.node(), node1);
874 assert!(!entry1.has_p1());
933 assert!(!entry1.has_p1());
875 assert_eq!(entry1.p1(), None);
934 assert_eq!(entry1.p1(), None);
876 assert_eq!(entry1.p2(), None);
935 assert_eq!(entry1.p2(), None);
877 let p1_entry = entry1.p1_entry().unwrap();
936 let p1_entry = entry1.p1_entry().unwrap();
878 assert!(p1_entry.is_none());
937 assert!(p1_entry.is_none());
879 let p2_entry = entry1.p2_entry().unwrap();
938 let p2_entry = entry1.p2_entry().unwrap();
880 assert!(p2_entry.is_none());
939 assert!(p2_entry.is_none());
881
940
882 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
941 let entry2 = revlog.get_entry(2.into()).ok().unwrap();
883 assert_eq!(entry2.revision(), Revision(2));
942 assert_eq!(entry2.revision(), Revision(2));
884 assert_eq!(*entry2.node(), node2);
943 assert_eq!(*entry2.node(), node2);
885 assert!(entry2.has_p1());
944 assert!(entry2.has_p1());
886 assert_eq!(entry2.p1(), Some(Revision(0)));
945 assert_eq!(entry2.p1(), Some(Revision(0)));
887 assert_eq!(entry2.p2(), Some(Revision(1)));
946 assert_eq!(entry2.p2(), Some(Revision(1)));
888 let p1_entry = entry2.p1_entry().unwrap();
947 let p1_entry = entry2.p1_entry().unwrap();
889 assert!(p1_entry.is_some());
948 assert!(p1_entry.is_some());
890 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
949 assert_eq!(p1_entry.unwrap().revision(), Revision(0));
891 let p2_entry = entry2.p2_entry().unwrap();
950 let p2_entry = entry2.p2_entry().unwrap();
892 assert!(p2_entry.is_some());
951 assert!(p2_entry.is_some());
893 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
952 assert_eq!(p2_entry.unwrap().revision(), Revision(1));
894 }
953 }
895
954
896 #[test]
955 #[test]
897 fn test_nodemap() {
956 fn test_nodemap() {
898 let temp = tempfile::tempdir().unwrap();
957 let temp = tempfile::tempdir().unwrap();
899 let vfs = Vfs { base: temp.path() };
958 let vfs = Vfs { base: temp.path() };
900
959
901 // building a revlog with a forced Node starting with zeros
960 // building a revlog with a forced Node starting with zeros
902 // This is a corruption, but it does not preclude using the nodemap
961 // This is a corruption, but it does not preclude using the nodemap
903 // if we don't try and access the data
962 // if we don't try and access the data
904 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
963 let node0 = Node::from_hex("00d2a3912a0b24502043eae84ee4b279c18b90dd")
905 .unwrap();
964 .unwrap();
906 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
965 let node1 = Node::from_hex("b004912a8510032a0350a74daa2803dadfb00e12")
907 .unwrap();
966 .unwrap();
908 let entry0_bytes = IndexEntryBuilder::new()
967 let entry0_bytes = IndexEntryBuilder::new()
909 .is_first(true)
968 .is_first(true)
910 .with_version(1)
969 .with_version(1)
911 .with_inline(true)
970 .with_inline(true)
912 .with_offset(INDEX_ENTRY_SIZE)
971 .with_offset(INDEX_ENTRY_SIZE)
913 .with_node(node0)
972 .with_node(node0)
914 .build();
973 .build();
915 let entry1_bytes = IndexEntryBuilder::new()
974 let entry1_bytes = IndexEntryBuilder::new()
916 .with_offset(INDEX_ENTRY_SIZE)
975 .with_offset(INDEX_ENTRY_SIZE)
917 .with_node(node1)
976 .with_node(node1)
918 .build();
977 .build();
919 let contents = vec![entry0_bytes, entry1_bytes]
978 let contents = vec![entry0_bytes, entry1_bytes]
920 .into_iter()
979 .into_iter()
921 .flatten()
980 .flatten()
922 .collect_vec();
981 .collect_vec();
923 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
982 std::fs::write(temp.path().join("foo.i"), contents).unwrap();
924
983
925 let mut idx = nodemap::tests::TestNtIndex::new();
984 let mut idx = nodemap::tests::TestNtIndex::new();
926 idx.insert_node(Revision(0), node0).unwrap();
985 idx.insert_node(Revision(0), node0).unwrap();
927 idx.insert_node(Revision(1), node1).unwrap();
986 idx.insert_node(Revision(1), node1).unwrap();
928
987
929 let revlog =
988 let revlog = Revlog::open_gen(
930 Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap();
989 &vfs,
990 "foo.i",
991 None,
992 RevlogOpenOptions::new(),
993 Some(idx.nt),
994 )
995 .unwrap();
931
996
932 // accessing the data shows the corruption
997 // accessing the data shows the corruption
933 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
998 revlog.get_entry(0.into()).unwrap().data().unwrap_err();
934
999
935 assert_eq!(
1000 assert_eq!(
936 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
1001 revlog.rev_from_node(NULL_NODE.into()).unwrap(),
937 Revision(-1)
1002 Revision(-1)
938 );
1003 );
939 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
1004 assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), Revision(0));
940 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
1005 assert_eq!(revlog.rev_from_node(node1.into()).unwrap(), Revision(1));
941 assert_eq!(
1006 assert_eq!(
942 revlog
1007 revlog
943 .rev_from_node(NodePrefix::from_hex("000").unwrap())
1008 .rev_from_node(NodePrefix::from_hex("000").unwrap())
944 .unwrap(),
1009 .unwrap(),
945 Revision(-1)
1010 Revision(-1)
946 );
1011 );
947 assert_eq!(
1012 assert_eq!(
948 revlog
1013 revlog
949 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
1014 .rev_from_node(NodePrefix::from_hex("b00").unwrap())
950 .unwrap(),
1015 .unwrap(),
951 Revision(1)
1016 Revision(1)
952 );
1017 );
953 // RevlogError does not implement PartialEq
1018 // RevlogError does not implement PartialEq
954 // (ultimately because io::Error does not)
1019 // (ultimately because io::Error does not)
955 match revlog
1020 match revlog
956 .rev_from_node(NodePrefix::from_hex("00").unwrap())
1021 .rev_from_node(NodePrefix::from_hex("00").unwrap())
957 .expect_err("Expected to give AmbiguousPrefix error")
1022 .expect_err("Expected to give AmbiguousPrefix error")
958 {
1023 {
959 RevlogError::AmbiguousPrefix => (),
1024 RevlogError::AmbiguousPrefix => (),
960 e => {
1025 e => {
961 panic!("Got another error than AmbiguousPrefix: {:?}", e);
1026 panic!("Got another error than AmbiguousPrefix: {:?}", e);
962 }
1027 }
963 };
1028 };
964 }
1029 }
965 }
1030 }
@@ -1,561 +1,572 b''
1 // revlog.rs
1 // revlog.rs
2 //
2 //
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
3 // Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::{
8 use crate::{
9 cindex,
9 cindex,
10 utils::{node_from_py_bytes, node_from_py_object},
10 utils::{node_from_py_bytes, node_from_py_object},
11 PyRevision,
11 PyRevision,
12 };
12 };
13 use cpython::{
13 use cpython::{
14 buffer::{Element, PyBuffer},
14 buffer::{Element, PyBuffer},
15 exc::{IndexError, ValueError},
15 exc::{IndexError, ValueError},
16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
16 ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule,
17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
17 PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
18 };
18 };
19 use hg::{
19 use hg::{
20 index::IndexHeader,
20 nodemap::{Block, NodeMapError, NodeTree},
21 nodemap::{Block, NodeMapError, NodeTree},
21 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
22 revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
22 BaseRevision, Revision, UncheckedRevision,
23 BaseRevision, Revision, UncheckedRevision,
23 };
24 };
24 use std::cell::RefCell;
25 use std::cell::RefCell;
25
26
26 /// Return a Struct implementing the Graph trait
27 /// Return a Struct implementing the Graph trait
27 pub(crate) fn pyindex_to_graph(
28 pub(crate) fn pyindex_to_graph(
28 py: Python,
29 py: Python,
29 index: PyObject,
30 index: PyObject,
30 ) -> PyResult<cindex::Index> {
31 ) -> PyResult<cindex::Index> {
31 match index.extract::<MixedIndex>(py) {
32 match index.extract::<MixedIndex>(py) {
32 Ok(midx) => Ok(midx.clone_cindex(py)),
33 Ok(midx) => Ok(midx.clone_cindex(py)),
33 Err(_) => cindex::Index::new(py, index),
34 Err(_) => cindex::Index::new(py, index),
34 }
35 }
35 }
36 }
36
37
37 py_class!(pub class MixedIndex |py| {
38 py_class!(pub class MixedIndex |py| {
38 data cindex: RefCell<cindex::Index>;
39 data cindex: RefCell<cindex::Index>;
39 data index: RefCell<hg::index::Index>;
40 data index: RefCell<hg::index::Index>;
40 data nt: RefCell<Option<NodeTree>>;
41 data nt: RefCell<Option<NodeTree>>;
41 data docket: RefCell<Option<PyObject>>;
42 data docket: RefCell<Option<PyObject>>;
42 // Holds a reference to the mmap'ed persistent nodemap data
43 // Holds a reference to the mmap'ed persistent nodemap data
43 data nodemap_mmap: RefCell<Option<PyBuffer>>;
44 data nodemap_mmap: RefCell<Option<PyBuffer>>;
44 // Holds a reference to the mmap'ed persistent index data
45 // Holds a reference to the mmap'ed persistent index data
45 data index_mmap: RefCell<Option<PyBuffer>>;
46 data index_mmap: RefCell<Option<PyBuffer>>;
46
47
47 def __new__(
48 def __new__(
48 _cls,
49 _cls,
49 cindex: PyObject,
50 cindex: PyObject,
50 data: PyObject
51 data: PyObject,
52 default_header: u32,
51 ) -> PyResult<MixedIndex> {
53 ) -> PyResult<MixedIndex> {
52 Self::new(py, cindex, data)
54 Self::new(py, cindex, data, default_header)
53 }
55 }
54
56
55 /// Compatibility layer used for Python consumers needing access to the C index
57 /// Compatibility layer used for Python consumers needing access to the C index
56 ///
58 ///
57 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
59 /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
58 /// that may need to build a custom `nodetree`, based on a specified revset.
60 /// that may need to build a custom `nodetree`, based on a specified revset.
59 /// With a Rust implementation of the nodemap, we will be able to get rid of
61 /// With a Rust implementation of the nodemap, we will be able to get rid of
60 /// this, by exposing our own standalone nodemap class,
62 /// this, by exposing our own standalone nodemap class,
61 /// ready to accept `MixedIndex`.
63 /// ready to accept `MixedIndex`.
62 def get_cindex(&self) -> PyResult<PyObject> {
64 def get_cindex(&self) -> PyResult<PyObject> {
63 Ok(self.cindex(py).borrow().inner().clone_ref(py))
65 Ok(self.cindex(py).borrow().inner().clone_ref(py))
64 }
66 }
65
67
66 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
68 // Index API involving nodemap, as defined in mercurial/pure/parsers.py
67
69
68 /// Return Revision if found, raises a bare `error.RevlogError`
70 /// Return Revision if found, raises a bare `error.RevlogError`
69 /// in case of ambiguity, same as C version does
71 /// in case of ambiguity, same as C version does
70 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
72 def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
71 let opt = self.get_nodetree(py)?.borrow();
73 let opt = self.get_nodetree(py)?.borrow();
72 let nt = opt.as_ref().unwrap();
74 let nt = opt.as_ref().unwrap();
73 let idx = &*self.cindex(py).borrow();
75 let idx = &*self.cindex(py).borrow();
74 let node = node_from_py_bytes(py, &node)?;
76 let node = node_from_py_bytes(py, &node)?;
75 let res = nt.find_bin(idx, node.into());
77 let res = nt.find_bin(idx, node.into());
76 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
78 Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into))
77 }
79 }
78
80
79 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
81 /// same as `get_rev()` but raises a bare `error.RevlogError` if node
80 /// is not found.
82 /// is not found.
81 ///
83 ///
82 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
84 /// No need to repeat `node` in the exception, `mercurial/revlog.py`
83 /// will catch and rewrap with it
85 /// will catch and rewrap with it
84 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
86 def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
85 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
87 self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
86 }
88 }
87
89
88 /// return True if the node exist in the index
90 /// return True if the node exist in the index
89 def has_node(&self, node: PyBytes) -> PyResult<bool> {
91 def has_node(&self, node: PyBytes) -> PyResult<bool> {
90 self.get_rev(py, node).map(|opt| opt.is_some())
92 self.get_rev(py, node).map(|opt| opt.is_some())
91 }
93 }
92
94
93 /// find length of shortest hex nodeid of a binary ID
95 /// find length of shortest hex nodeid of a binary ID
94 def shortest(&self, node: PyBytes) -> PyResult<usize> {
96 def shortest(&self, node: PyBytes) -> PyResult<usize> {
95 let opt = self.get_nodetree(py)?.borrow();
97 let opt = self.get_nodetree(py)?.borrow();
96 let nt = opt.as_ref().unwrap();
98 let nt = opt.as_ref().unwrap();
97 let idx = &*self.cindex(py).borrow();
99 let idx = &*self.cindex(py).borrow();
98 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
100 match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
99 {
101 {
100 Ok(Some(l)) => Ok(l),
102 Ok(Some(l)) => Ok(l),
101 Ok(None) => Err(revlog_error(py)),
103 Ok(None) => Err(revlog_error(py)),
102 Err(e) => Err(nodemap_error(py, e)),
104 Err(e) => Err(nodemap_error(py, e)),
103 }
105 }
104 }
106 }
105
107
106 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
108 def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
107 let opt = self.get_nodetree(py)?.borrow();
109 let opt = self.get_nodetree(py)?.borrow();
108 let nt = opt.as_ref().unwrap();
110 let nt = opt.as_ref().unwrap();
109 let idx = &*self.cindex(py).borrow();
111 let idx = &*self.cindex(py).borrow();
110
112
111 let node_as_string = if cfg!(feature = "python3-sys") {
113 let node_as_string = if cfg!(feature = "python3-sys") {
112 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
114 node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
113 }
115 }
114 else {
116 else {
115 let node = node.extract::<PyBytes>(py)?;
117 let node = node.extract::<PyBytes>(py)?;
116 String::from_utf8_lossy(node.data(py)).to_string()
118 String::from_utf8_lossy(node.data(py)).to_string()
117 };
119 };
118
120
119 let prefix = NodePrefix::from_hex(&node_as_string)
121 let prefix = NodePrefix::from_hex(&node_as_string)
120 .map_err(|_| PyErr::new::<ValueError, _>(
122 .map_err(|_| PyErr::new::<ValueError, _>(
121 py, format!("Invalid node or prefix '{}'", node_as_string))
123 py, format!("Invalid node or prefix '{}'", node_as_string))
122 )?;
124 )?;
123
125
124 nt.find_bin(idx, prefix)
126 nt.find_bin(idx, prefix)
125 // TODO make an inner API returning the node directly
127 // TODO make an inner API returning the node directly
126 .map(|opt| opt.map(
128 .map(|opt| opt.map(
127 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
129 |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
128 .map_err(|e| nodemap_error(py, e))
130 .map_err(|e| nodemap_error(py, e))
129
131
130 }
132 }
131
133
132 /// append an index entry
134 /// append an index entry
133 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
135 def append(&self, tup: PyTuple) -> PyResult<PyObject> {
134 if tup.len(py) < 8 {
136 if tup.len(py) < 8 {
135 // this is better than the panic promised by tup.get_item()
137 // this is better than the panic promised by tup.get_item()
136 return Err(
138 return Err(
137 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
139 PyErr::new::<IndexError, _>(py, "tuple index out of range"))
138 }
140 }
139 let node_bytes = tup.get_item(py, 7).extract(py)?;
141 let node_bytes = tup.get_item(py, 7).extract(py)?;
140 let node = node_from_py_object(py, &node_bytes)?;
142 let node = node_from_py_object(py, &node_bytes)?;
141
143
142 let mut idx = self.cindex(py).borrow_mut();
144 let mut idx = self.cindex(py).borrow_mut();
143
145
144 // This is ok since we will just add the revision to the index
146 // This is ok since we will just add the revision to the index
145 let rev = Revision(idx.len() as BaseRevision);
147 let rev = Revision(idx.len() as BaseRevision);
146 idx.append(py, tup)?;
148 idx.append(py, tup)?;
147
149
148 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
150 self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
149 .insert(&*idx, &node, rev)
151 .insert(&*idx, &node, rev)
150 .map_err(|e| nodemap_error(py, e))?;
152 .map_err(|e| nodemap_error(py, e))?;
151 Ok(py.None())
153 Ok(py.None())
152 }
154 }
153
155
154 def __delitem__(&self, key: PyObject) -> PyResult<()> {
156 def __delitem__(&self, key: PyObject) -> PyResult<()> {
155 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
157 // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
156 self.cindex(py).borrow().inner().del_item(py, key)?;
158 self.cindex(py).borrow().inner().del_item(py, key)?;
157 let mut opt = self.get_nodetree(py)?.borrow_mut();
159 let mut opt = self.get_nodetree(py)?.borrow_mut();
158 let nt = opt.as_mut().unwrap();
160 let nt = opt.as_mut().unwrap();
159 nt.invalidate_all();
161 nt.invalidate_all();
160 self.fill_nodemap(py, nt)?;
162 self.fill_nodemap(py, nt)?;
161 Ok(())
163 Ok(())
162 }
164 }
163
165
164 //
166 //
165 // Reforwarded C index API
167 // Reforwarded C index API
166 //
168 //
167
169
168 // index_methods (tp_methods). Same ordering as in revlog.c
170 // index_methods (tp_methods). Same ordering as in revlog.c
169
171
170 /// return the gca set of the given revs
172 /// return the gca set of the given revs
171 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
173 def ancestors(&self, *args, **kw) -> PyResult<PyObject> {
172 self.call_cindex(py, "ancestors", args, kw)
174 self.call_cindex(py, "ancestors", args, kw)
173 }
175 }
174
176
175 /// return the heads of the common ancestors of the given revs
177 /// return the heads of the common ancestors of the given revs
176 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
178 def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> {
177 self.call_cindex(py, "commonancestorsheads", args, kw)
179 self.call_cindex(py, "commonancestorsheads", args, kw)
178 }
180 }
179
181
180 /// Clear the index caches and inner py_class data.
182 /// Clear the index caches and inner py_class data.
181 /// It is Python's responsibility to call `update_nodemap_data` again.
183 /// It is Python's responsibility to call `update_nodemap_data` again.
182 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
184 def clearcaches(&self, *args, **kw) -> PyResult<PyObject> {
183 self.nt(py).borrow_mut().take();
185 self.nt(py).borrow_mut().take();
184 self.docket(py).borrow_mut().take();
186 self.docket(py).borrow_mut().take();
185 self.nodemap_mmap(py).borrow_mut().take();
187 self.nodemap_mmap(py).borrow_mut().take();
186 self.call_cindex(py, "clearcaches", args, kw)
188 self.call_cindex(py, "clearcaches", args, kw)
187 }
189 }
188
190
189 /// return the raw binary string representing a revision
191 /// return the raw binary string representing a revision
190 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
192 def entry_binary(&self, *args, **kw) -> PyResult<PyObject> {
191 self.call_cindex(py, "entry_binary", args, kw)
193 self.call_cindex(py, "entry_binary", args, kw)
192 }
194 }
193
195
194 /// return a binary packed version of the header
196 /// return a binary packed version of the header
195 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
197 def pack_header(&self, *args, **kw) -> PyResult<PyObject> {
196 self.call_cindex(py, "pack_header", args, kw)
198 self.call_cindex(py, "pack_header", args, kw)
197 }
199 }
198
200
199 /// get an index entry
201 /// get an index entry
200 def get(&self, *args, **kw) -> PyResult<PyObject> {
202 def get(&self, *args, **kw) -> PyResult<PyObject> {
201 self.call_cindex(py, "get", args, kw)
203 self.call_cindex(py, "get", args, kw)
202 }
204 }
203
205
204 /// compute phases
206 /// compute phases
205 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
207 def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
206 self.call_cindex(py, "computephasesmapsets", args, kw)
208 self.call_cindex(py, "computephasesmapsets", args, kw)
207 }
209 }
208
210
209 /// reachableroots
211 /// reachableroots
210 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
212 def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> {
211 self.call_cindex(py, "reachableroots2", args, kw)
213 self.call_cindex(py, "reachableroots2", args, kw)
212 }
214 }
213
215
214 /// get head revisions
216 /// get head revisions
215 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
217 def headrevs(&self, *args, **kw) -> PyResult<PyObject> {
216 self.call_cindex(py, "headrevs", args, kw)
218 self.call_cindex(py, "headrevs", args, kw)
217 }
219 }
218
220
219 /// get filtered head revisions
221 /// get filtered head revisions
220 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
222 def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> {
221 self.call_cindex(py, "headrevsfiltered", args, kw)
223 self.call_cindex(py, "headrevsfiltered", args, kw)
222 }
224 }
223
225
224 /// True if the object is a snapshot
226 /// True if the object is a snapshot
225 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
227 def issnapshot(&self, *args, **kw) -> PyResult<PyObject> {
226 self.call_cindex(py, "issnapshot", args, kw)
228 self.call_cindex(py, "issnapshot", args, kw)
227 }
229 }
228
230
229 /// Gather snapshot data in a cache dict
231 /// Gather snapshot data in a cache dict
230 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
232 def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> {
231 self.call_cindex(py, "findsnapshots", args, kw)
233 self.call_cindex(py, "findsnapshots", args, kw)
232 }
234 }
233
235
234 /// determine revisions with deltas to reconstruct fulltext
236 /// determine revisions with deltas to reconstruct fulltext
235 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
237 def deltachain(&self, *args, **kw) -> PyResult<PyObject> {
236 self.call_cindex(py, "deltachain", args, kw)
238 self.call_cindex(py, "deltachain", args, kw)
237 }
239 }
238
240
239 /// slice planned chunk read to reach a density threshold
241 /// slice planned chunk read to reach a density threshold
240 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
242 def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> {
241 self.call_cindex(py, "slicechunktodensity", args, kw)
243 self.call_cindex(py, "slicechunktodensity", args, kw)
242 }
244 }
243
245
244 /// stats for the index
246 /// stats for the index
245 def stats(&self, *args, **kw) -> PyResult<PyObject> {
247 def stats(&self, *args, **kw) -> PyResult<PyObject> {
246 self.call_cindex(py, "stats", args, kw)
248 self.call_cindex(py, "stats", args, kw)
247 }
249 }
248
250
249 // index_sequence_methods and index_mapping_methods.
251 // index_sequence_methods and index_mapping_methods.
250 //
252 //
251 // Since we call back through the high level Python API,
253 // Since we call back through the high level Python API,
252 // there's no point making a distinction between index_get
254 // there's no point making a distinction between index_get
253 // and index_getitem.
255 // and index_getitem.
254
256
255 def __len__(&self) -> PyResult<usize> {
257 def __len__(&self) -> PyResult<usize> {
256 self.cindex(py).borrow().inner().len(py)
258 self.cindex(py).borrow().inner().len(py)
257 }
259 }
258
260
259 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
261 def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
260 // this conversion seems needless, but that's actually because
262 // this conversion seems needless, but that's actually because
261 // `index_getitem` does not handle conversion from PyLong,
263 // `index_getitem` does not handle conversion from PyLong,
262 // which expressions such as [e for e in index] internally use.
264 // which expressions such as [e for e in index] internally use.
263 // Note that we don't seem to have a direct way to call
265 // Note that we don't seem to have a direct way to call
264 // PySequence_GetItem (does the job), which would possibly be better
266 // PySequence_GetItem (does the job), which would possibly be better
265 // for performance
267 // for performance
266 let key = match key.extract::<i32>(py) {
268 let key = match key.extract::<i32>(py) {
267 Ok(rev) => rev.to_py_object(py).into_object(),
269 Ok(rev) => rev.to_py_object(py).into_object(),
268 Err(_) => key,
270 Err(_) => key,
269 };
271 };
270 self.cindex(py).borrow().inner().get_item(py, key)
272 self.cindex(py).borrow().inner().get_item(py, key)
271 }
273 }
272
274
273 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
275 def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> {
274 self.cindex(py).borrow().inner().set_item(py, key, value)
276 self.cindex(py).borrow().inner().set_item(py, key, value)
275 }
277 }
276
278
277 def __contains__(&self, item: PyObject) -> PyResult<bool> {
279 def __contains__(&self, item: PyObject) -> PyResult<bool> {
278 // ObjectProtocol does not seem to provide contains(), so
280 // ObjectProtocol does not seem to provide contains(), so
279 // this is an equivalent implementation of the index_contains()
281 // this is an equivalent implementation of the index_contains()
280 // defined in revlog.c
282 // defined in revlog.c
281 let cindex = self.cindex(py).borrow();
283 let cindex = self.cindex(py).borrow();
282 match item.extract::<i32>(py) {
284 match item.extract::<i32>(py) {
283 Ok(rev) => {
285 Ok(rev) => {
284 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
286 Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision)
285 }
287 }
286 Err(_) => {
288 Err(_) => {
287 cindex.inner().call_method(
289 cindex.inner().call_method(
288 py,
290 py,
289 "has_node",
291 "has_node",
290 PyTuple::new(py, &[item]),
292 PyTuple::new(py, &[item]),
291 None)?
293 None)?
292 .extract(py)
294 .extract(py)
293 }
295 }
294 }
296 }
295 }
297 }
296
298
297 def nodemap_data_all(&self) -> PyResult<PyBytes> {
299 def nodemap_data_all(&self) -> PyResult<PyBytes> {
298 self.inner_nodemap_data_all(py)
300 self.inner_nodemap_data_all(py)
299 }
301 }
300
302
301 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
303 def nodemap_data_incremental(&self) -> PyResult<PyObject> {
302 self.inner_nodemap_data_incremental(py)
304 self.inner_nodemap_data_incremental(py)
303 }
305 }
304 def update_nodemap_data(
306 def update_nodemap_data(
305 &self,
307 &self,
306 docket: PyObject,
308 docket: PyObject,
307 nm_data: PyObject
309 nm_data: PyObject
308 ) -> PyResult<PyObject> {
310 ) -> PyResult<PyObject> {
309 self.inner_update_nodemap_data(py, docket, nm_data)
311 self.inner_update_nodemap_data(py, docket, nm_data)
310 }
312 }
311
313
312 @property
314 @property
313 def entry_size(&self) -> PyResult<PyInt> {
315 def entry_size(&self) -> PyResult<PyInt> {
314 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
316 self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py)
315 }
317 }
316
318
317 @property
319 @property
318 def rust_ext_compat(&self) -> PyResult<PyInt> {
320 def rust_ext_compat(&self) -> PyResult<PyInt> {
319 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
321 self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py)
320 }
322 }
321
323
322 });
324 });
323
325
324 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
326 /// Take a (potentially) mmap'ed buffer, and return the underlying Python
325 /// buffer along with the Rust slice into said buffer. We need to keep the
327 /// buffer along with the Rust slice into said buffer. We need to keep the
326 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
328 /// Python buffer around, otherwise we'd get a dangling pointer once the buffer
327 /// is freed from Python's side.
329 /// is freed from Python's side.
328 ///
330 ///
329 /// # Safety
331 /// # Safety
330 ///
332 ///
331 /// The caller must make sure that the buffer is kept around for at least as
333 /// The caller must make sure that the buffer is kept around for at least as
332 /// long as the slice.
334 /// long as the slice.
333 #[deny(unsafe_op_in_unsafe_fn)]
335 #[deny(unsafe_op_in_unsafe_fn)]
334 unsafe fn mmap_keeparound(
336 unsafe fn mmap_keeparound(
335 py: Python,
337 py: Python,
336 data: PyObject,
338 data: PyObject,
337 ) -> PyResult<(
339 ) -> PyResult<(
338 PyBuffer,
340 PyBuffer,
339 Box<dyn std::ops::Deref<Target = [u8]> + Send + 'static>,
341 Box<dyn std::ops::Deref<Target = [u8]> + Send + 'static>,
340 )> {
342 )> {
341 let buf = PyBuffer::get(py, &data)?;
343 let buf = PyBuffer::get(py, &data)?;
342 let len = buf.item_count();
344 let len = buf.item_count();
343
345
344 // Build a slice from the mmap'ed buffer data
346 // Build a slice from the mmap'ed buffer data
345 let cbuf = buf.buf_ptr();
347 let cbuf = buf.buf_ptr();
346 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
348 let bytes = if std::mem::size_of::<u8>() == buf.item_size()
347 && buf.is_c_contiguous()
349 && buf.is_c_contiguous()
348 && u8::is_compatible_format(buf.format())
350 && u8::is_compatible_format(buf.format())
349 {
351 {
350 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
352 unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) }
351 } else {
353 } else {
352 return Err(PyErr::new::<ValueError, _>(
354 return Err(PyErr::new::<ValueError, _>(
353 py,
355 py,
354 "Nodemap data buffer has an invalid memory representation"
356 "Nodemap data buffer has an invalid memory representation"
355 .to_string(),
357 .to_string(),
356 ));
358 ));
357 };
359 };
358
360
359 Ok((buf, Box::new(bytes)))
361 Ok((buf, Box::new(bytes)))
360 }
362 }
361
363
362 impl MixedIndex {
364 impl MixedIndex {
363 fn new(
365 fn new(
364 py: Python,
366 py: Python,
365 cindex: PyObject,
367 cindex: PyObject,
366 data: PyObject,
368 data: PyObject,
369 header: u32,
367 ) -> PyResult<MixedIndex> {
370 ) -> PyResult<MixedIndex> {
368 // Safety: we keep the buffer around inside the class as `index_mmap`
371 // Safety: we keep the buffer around inside the class as `index_mmap`
369 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
372 let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
370
373
371 Self::create_instance(
374 Self::create_instance(
372 py,
375 py,
373 RefCell::new(cindex::Index::new(py, cindex)?),
376 RefCell::new(cindex::Index::new(py, cindex)?),
374 RefCell::new(hg::index::Index::new(bytes).unwrap()),
377 RefCell::new(
378 hg::index::Index::new(
379 bytes,
380 IndexHeader::parse(&header.to_be_bytes())
381 .expect("default header is broken")
382 .unwrap(),
383 )
384 .unwrap(),
385 ),
375 RefCell::new(None),
386 RefCell::new(None),
376 RefCell::new(None),
387 RefCell::new(None),
377 RefCell::new(None),
388 RefCell::new(None),
378 RefCell::new(Some(buf)),
389 RefCell::new(Some(buf)),
379 )
390 )
380 }
391 }
381
392
382 /// This is scaffolding at this point, but it could also become
393 /// This is scaffolding at this point, but it could also become
383 /// a way to start a persistent nodemap or perform a
394 /// a way to start a persistent nodemap or perform a
384 /// vacuum / repack operation
395 /// vacuum / repack operation
385 fn fill_nodemap(
396 fn fill_nodemap(
386 &self,
397 &self,
387 py: Python,
398 py: Python,
388 nt: &mut NodeTree,
399 nt: &mut NodeTree,
389 ) -> PyResult<PyObject> {
400 ) -> PyResult<PyObject> {
390 let index = self.cindex(py).borrow();
401 let index = self.cindex(py).borrow();
391 for r in 0..index.len() {
402 for r in 0..index.len() {
392 let rev = Revision(r as BaseRevision);
403 let rev = Revision(r as BaseRevision);
393 // in this case node() won't ever return None
404 // in this case node() won't ever return None
394 nt.insert(&*index, index.node(rev).unwrap(), rev)
405 nt.insert(&*index, index.node(rev).unwrap(), rev)
395 .map_err(|e| nodemap_error(py, e))?
406 .map_err(|e| nodemap_error(py, e))?
396 }
407 }
397 Ok(py.None())
408 Ok(py.None())
398 }
409 }
399
410
400 fn get_nodetree<'a>(
411 fn get_nodetree<'a>(
401 &'a self,
412 &'a self,
402 py: Python<'a>,
413 py: Python<'a>,
403 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
414 ) -> PyResult<&'a RefCell<Option<NodeTree>>> {
404 if self.nt(py).borrow().is_none() {
415 if self.nt(py).borrow().is_none() {
405 let readonly = Box::<Vec<_>>::default();
416 let readonly = Box::<Vec<_>>::default();
406 let mut nt = NodeTree::load_bytes(readonly, 0);
417 let mut nt = NodeTree::load_bytes(readonly, 0);
407 self.fill_nodemap(py, &mut nt)?;
418 self.fill_nodemap(py, &mut nt)?;
408 self.nt(py).borrow_mut().replace(nt);
419 self.nt(py).borrow_mut().replace(nt);
409 }
420 }
410 Ok(self.nt(py))
421 Ok(self.nt(py))
411 }
422 }
412
423
413 /// forward a method call to the underlying C index
424 /// forward a method call to the underlying C index
414 fn call_cindex(
425 fn call_cindex(
415 &self,
426 &self,
416 py: Python,
427 py: Python,
417 name: &str,
428 name: &str,
418 args: &PyTuple,
429 args: &PyTuple,
419 kwargs: Option<&PyDict>,
430 kwargs: Option<&PyDict>,
420 ) -> PyResult<PyObject> {
431 ) -> PyResult<PyObject> {
421 self.cindex(py)
432 self.cindex(py)
422 .borrow()
433 .borrow()
423 .inner()
434 .inner()
424 .call_method(py, name, args, kwargs)
435 .call_method(py, name, args, kwargs)
425 }
436 }
426
437
427 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
438 pub fn clone_cindex(&self, py: Python) -> cindex::Index {
428 self.cindex(py).borrow().clone_ref(py)
439 self.cindex(py).borrow().clone_ref(py)
429 }
440 }
430
441
431 /// Returns the full nodemap bytes to be written as-is to disk
442 /// Returns the full nodemap bytes to be written as-is to disk
432 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
443 fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
433 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
444 let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
434 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
445 let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
435
446
436 // If there's anything readonly, we need to build the data again from
447 // If there's anything readonly, we need to build the data again from
437 // scratch
448 // scratch
438 let bytes = if readonly.len() > 0 {
449 let bytes = if readonly.len() > 0 {
439 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
450 let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0);
440 self.fill_nodemap(py, &mut nt)?;
451 self.fill_nodemap(py, &mut nt)?;
441
452
442 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
453 let (readonly, bytes) = nt.into_readonly_and_added_bytes();
443 assert_eq!(readonly.len(), 0);
454 assert_eq!(readonly.len(), 0);
444
455
445 bytes
456 bytes
446 } else {
457 } else {
447 bytes
458 bytes
448 };
459 };
449
460
450 let bytes = PyBytes::new(py, &bytes);
461 let bytes = PyBytes::new(py, &bytes);
451 Ok(bytes)
462 Ok(bytes)
452 }
463 }
453
464
454 /// Returns the last saved docket along with the size of any changed data
465 /// Returns the last saved docket along with the size of any changed data
455 /// (in number of blocks), and said data as bytes.
466 /// (in number of blocks), and said data as bytes.
456 fn inner_nodemap_data_incremental(
467 fn inner_nodemap_data_incremental(
457 &self,
468 &self,
458 py: Python,
469 py: Python,
459 ) -> PyResult<PyObject> {
470 ) -> PyResult<PyObject> {
460 let docket = self.docket(py).borrow();
471 let docket = self.docket(py).borrow();
461 let docket = match docket.as_ref() {
472 let docket = match docket.as_ref() {
462 Some(d) => d,
473 Some(d) => d,
463 None => return Ok(py.None()),
474 None => return Ok(py.None()),
464 };
475 };
465
476
466 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
477 let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
467 let masked_blocks = node_tree.masked_readonly_blocks();
478 let masked_blocks = node_tree.masked_readonly_blocks();
468 let (_, data) = node_tree.into_readonly_and_added_bytes();
479 let (_, data) = node_tree.into_readonly_and_added_bytes();
469 let changed = masked_blocks * std::mem::size_of::<Block>();
480 let changed = masked_blocks * std::mem::size_of::<Block>();
470
481
471 Ok((docket, changed, PyBytes::new(py, &data))
482 Ok((docket, changed, PyBytes::new(py, &data))
472 .to_py_object(py)
483 .to_py_object(py)
473 .into_object())
484 .into_object())
474 }
485 }
475
486
476 /// Update the nodemap from the new (mmaped) data.
487 /// Update the nodemap from the new (mmaped) data.
477 /// The docket is kept as a reference for later incremental calls.
488 /// The docket is kept as a reference for later incremental calls.
478 fn inner_update_nodemap_data(
489 fn inner_update_nodemap_data(
479 &self,
490 &self,
480 py: Python,
491 py: Python,
481 docket: PyObject,
492 docket: PyObject,
482 nm_data: PyObject,
493 nm_data: PyObject,
483 ) -> PyResult<PyObject> {
494 ) -> PyResult<PyObject> {
484 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
495 // Safety: we keep the buffer around inside the class as `nodemap_mmap`
485 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
496 let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? };
486 let len = buf.item_count();
497 let len = buf.item_count();
487 self.nodemap_mmap(py).borrow_mut().replace(buf);
498 self.nodemap_mmap(py).borrow_mut().replace(buf);
488
499
489 let mut nt = NodeTree::load_bytes(bytes, len);
500 let mut nt = NodeTree::load_bytes(bytes, len);
490
501
491 let data_tip = docket
502 let data_tip = docket
492 .getattr(py, "tip_rev")?
503 .getattr(py, "tip_rev")?
493 .extract::<BaseRevision>(py)?
504 .extract::<BaseRevision>(py)?
494 .into();
505 .into();
495 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
506 self.docket(py).borrow_mut().replace(docket.clone_ref(py));
496 let idx = self.cindex(py).borrow();
507 let idx = self.cindex(py).borrow();
497 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
508 let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
498 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
509 nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
499 })?;
510 })?;
500 let current_tip = idx.len();
511 let current_tip = idx.len();
501
512
502 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
513 for r in (data_tip.0 + 1)..current_tip as BaseRevision {
503 let rev = Revision(r);
514 let rev = Revision(r);
504 // in this case node() won't ever return None
515 // in this case node() won't ever return None
505 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
516 nt.insert(&*idx, idx.node(rev).unwrap(), rev)
506 .map_err(|e| nodemap_error(py, e))?
517 .map_err(|e| nodemap_error(py, e))?
507 }
518 }
508
519
509 *self.nt(py).borrow_mut() = Some(nt);
520 *self.nt(py).borrow_mut() = Some(nt);
510
521
511 Ok(py.None())
522 Ok(py.None())
512 }
523 }
513 }
524 }
514
525
515 fn revlog_error(py: Python) -> PyErr {
526 fn revlog_error(py: Python) -> PyErr {
516 match py
527 match py
517 .import("mercurial.error")
528 .import("mercurial.error")
518 .and_then(|m| m.get(py, "RevlogError"))
529 .and_then(|m| m.get(py, "RevlogError"))
519 {
530 {
520 Err(e) => e,
531 Err(e) => e,
521 Ok(cls) => PyErr::from_instance(
532 Ok(cls) => PyErr::from_instance(
522 py,
533 py,
523 cls.call(py, (py.None(),), None).ok().into_py_object(py),
534 cls.call(py, (py.None(),), None).ok().into_py_object(py),
524 ),
535 ),
525 }
536 }
526 }
537 }
527
538
528 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
539 fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr {
529 PyErr::new::<ValueError, _>(
540 PyErr::new::<ValueError, _>(
530 py,
541 py,
531 format!(
542 format!(
532 "Inconsistency: Revision {} found in nodemap \
543 "Inconsistency: Revision {} found in nodemap \
533 is not in revlog index",
544 is not in revlog index",
534 rev
545 rev
535 ),
546 ),
536 )
547 )
537 }
548 }
538
549
539 /// Standard treatment of NodeMapError
550 /// Standard treatment of NodeMapError
540 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
551 fn nodemap_error(py: Python, err: NodeMapError) -> PyErr {
541 match err {
552 match err {
542 NodeMapError::MultipleResults => revlog_error(py),
553 NodeMapError::MultipleResults => revlog_error(py),
543 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
554 NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r),
544 }
555 }
545 }
556 }
546
557
547 /// Create the module, with __package__ given from parent
558 /// Create the module, with __package__ given from parent
548 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
559 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
549 let dotted_name = &format!("{}.revlog", package);
560 let dotted_name = &format!("{}.revlog", package);
550 let m = PyModule::new(py, dotted_name)?;
561 let m = PyModule::new(py, dotted_name)?;
551 m.add(py, "__package__", package)?;
562 m.add(py, "__package__", package)?;
552 m.add(py, "__doc__", "RevLog - Rust implementations")?;
563 m.add(py, "__doc__", "RevLog - Rust implementations")?;
553
564
554 m.add_class::<MixedIndex>(py)?;
565 m.add_class::<MixedIndex>(py)?;
555
566
556 let sys = PyModule::import(py, "sys")?;
567 let sys = PyModule::import(py, "sys")?;
557 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
568 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
558 sys_modules.set_item(py, dotted_name, &m)?;
569 sys_modules.set_item(py, dotted_name, &m)?;
559
570
560 Ok(m)
571 Ok(m)
561 }
572 }
@@ -1,815 +1,822 b''
1 // status.rs
1 // status.rs
2 //
2 //
3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
3 // Copyright 2020, Georges Racinet <georges.racinets@octobus.net>
4 //
4 //
5 // This software may be used and distributed according to the terms of the
5 // This software may be used and distributed according to the terms of the
6 // GNU General Public License version 2 or any later version.
6 // GNU General Public License version 2 or any later version.
7
7
8 use crate::error::CommandError;
8 use crate::error::CommandError;
9 use crate::ui::{
9 use crate::ui::{
10 format_pattern_file_warning, print_narrow_sparse_warnings, relative_paths,
10 format_pattern_file_warning, print_narrow_sparse_warnings, relative_paths,
11 RelativePaths, Ui,
11 RelativePaths, Ui,
12 };
12 };
13 use crate::utils::path_utils::RelativizePaths;
13 use crate::utils::path_utils::RelativizePaths;
14 use clap::Arg;
14 use clap::Arg;
15 use format_bytes::format_bytes;
15 use format_bytes::format_bytes;
16 use hg::config::Config;
16 use hg::config::Config;
17 use hg::dirstate::has_exec_bit;
17 use hg::dirstate::has_exec_bit;
18 use hg::dirstate::status::StatusPath;
18 use hg::dirstate::status::StatusPath;
19 use hg::dirstate::TruncatedTimestamp;
19 use hg::dirstate::TruncatedTimestamp;
20 use hg::errors::{HgError, IoResultExt};
20 use hg::errors::{HgError, IoResultExt};
21 use hg::filepatterns::parse_pattern_args;
21 use hg::filepatterns::parse_pattern_args;
22 use hg::lock::LockError;
22 use hg::lock::LockError;
23 use hg::manifest::Manifest;
23 use hg::manifest::Manifest;
24 use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
24 use hg::matchers::{AlwaysMatcher, IntersectionMatcher};
25 use hg::repo::Repo;
25 use hg::repo::Repo;
26 use hg::utils::debug::debug_wait_for_file;
26 use hg::utils::debug::debug_wait_for_file;
27 use hg::utils::files::{
27 use hg::utils::files::{
28 get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
28 get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes,
29 };
29 };
30 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
30 use hg::utils::hg_path::{hg_path_to_path_buf, HgPath};
31 use hg::DirstateStatus;
32 use hg::PatternFileWarning;
31 use hg::PatternFileWarning;
33 use hg::Revision;
32 use hg::Revision;
34 use hg::StatusError;
33 use hg::StatusError;
35 use hg::StatusOptions;
34 use hg::StatusOptions;
36 use hg::{self, narrow, sparse};
35 use hg::{self, narrow, sparse};
36 use hg::{DirstateStatus, RevlogOpenOptions};
37 use log::info;
37 use log::info;
38 use rayon::prelude::*;
38 use rayon::prelude::*;
39 use std::borrow::Cow;
39 use std::borrow::Cow;
40 use std::io;
40 use std::io;
41 use std::mem::take;
41 use std::mem::take;
42 use std::path::PathBuf;
42 use std::path::PathBuf;
43
43
44 pub const HELP_TEXT: &str = "
44 pub const HELP_TEXT: &str = "
45 Show changed files in the working directory
45 Show changed files in the working directory
46
46
47 This is a pure Rust version of `hg status`.
47 This is a pure Rust version of `hg status`.
48
48
49 Some options might be missing, check the list below.
49 Some options might be missing, check the list below.
50 ";
50 ";
51
51
52 pub fn args() -> clap::Command {
52 pub fn args() -> clap::Command {
53 clap::command!("status")
53 clap::command!("status")
54 .alias("st")
54 .alias("st")
55 .about(HELP_TEXT)
55 .about(HELP_TEXT)
56 .arg(
56 .arg(
57 Arg::new("file")
57 Arg::new("file")
58 .value_parser(clap::value_parser!(std::ffi::OsString))
58 .value_parser(clap::value_parser!(std::ffi::OsString))
59 .help("show only these files")
59 .help("show only these files")
60 .action(clap::ArgAction::Append),
60 .action(clap::ArgAction::Append),
61 )
61 )
62 .arg(
62 .arg(
63 Arg::new("all")
63 Arg::new("all")
64 .help("show status of all files")
64 .help("show status of all files")
65 .short('A')
65 .short('A')
66 .action(clap::ArgAction::SetTrue)
66 .action(clap::ArgAction::SetTrue)
67 .long("all"),
67 .long("all"),
68 )
68 )
69 .arg(
69 .arg(
70 Arg::new("modified")
70 Arg::new("modified")
71 .help("show only modified files")
71 .help("show only modified files")
72 .short('m')
72 .short('m')
73 .action(clap::ArgAction::SetTrue)
73 .action(clap::ArgAction::SetTrue)
74 .long("modified"),
74 .long("modified"),
75 )
75 )
76 .arg(
76 .arg(
77 Arg::new("added")
77 Arg::new("added")
78 .help("show only added files")
78 .help("show only added files")
79 .short('a')
79 .short('a')
80 .action(clap::ArgAction::SetTrue)
80 .action(clap::ArgAction::SetTrue)
81 .long("added"),
81 .long("added"),
82 )
82 )
83 .arg(
83 .arg(
84 Arg::new("removed")
84 Arg::new("removed")
85 .help("show only removed files")
85 .help("show only removed files")
86 .short('r')
86 .short('r')
87 .action(clap::ArgAction::SetTrue)
87 .action(clap::ArgAction::SetTrue)
88 .long("removed"),
88 .long("removed"),
89 )
89 )
90 .arg(
90 .arg(
91 Arg::new("clean")
91 Arg::new("clean")
92 .help("show only clean files")
92 .help("show only clean files")
93 .short('c')
93 .short('c')
94 .action(clap::ArgAction::SetTrue)
94 .action(clap::ArgAction::SetTrue)
95 .long("clean"),
95 .long("clean"),
96 )
96 )
97 .arg(
97 .arg(
98 Arg::new("deleted")
98 Arg::new("deleted")
99 .help("show only deleted files")
99 .help("show only deleted files")
100 .short('d')
100 .short('d')
101 .action(clap::ArgAction::SetTrue)
101 .action(clap::ArgAction::SetTrue)
102 .long("deleted"),
102 .long("deleted"),
103 )
103 )
104 .arg(
104 .arg(
105 Arg::new("unknown")
105 Arg::new("unknown")
106 .help("show only unknown (not tracked) files")
106 .help("show only unknown (not tracked) files")
107 .short('u')
107 .short('u')
108 .action(clap::ArgAction::SetTrue)
108 .action(clap::ArgAction::SetTrue)
109 .long("unknown"),
109 .long("unknown"),
110 )
110 )
111 .arg(
111 .arg(
112 Arg::new("ignored")
112 Arg::new("ignored")
113 .help("show only ignored files")
113 .help("show only ignored files")
114 .short('i')
114 .short('i')
115 .action(clap::ArgAction::SetTrue)
115 .action(clap::ArgAction::SetTrue)
116 .long("ignored"),
116 .long("ignored"),
117 )
117 )
118 .arg(
118 .arg(
119 Arg::new("copies")
119 Arg::new("copies")
120 .help("show source of copied files (DEFAULT: ui.statuscopies)")
120 .help("show source of copied files (DEFAULT: ui.statuscopies)")
121 .short('C')
121 .short('C')
122 .action(clap::ArgAction::SetTrue)
122 .action(clap::ArgAction::SetTrue)
123 .long("copies"),
123 .long("copies"),
124 )
124 )
125 .arg(
125 .arg(
126 Arg::new("print0")
126 Arg::new("print0")
127 .help("end filenames with NUL, for use with xargs")
127 .help("end filenames with NUL, for use with xargs")
128 .short('0')
128 .short('0')
129 .action(clap::ArgAction::SetTrue)
129 .action(clap::ArgAction::SetTrue)
130 .long("print0"),
130 .long("print0"),
131 )
131 )
132 .arg(
132 .arg(
133 Arg::new("no-status")
133 Arg::new("no-status")
134 .help("hide status prefix")
134 .help("hide status prefix")
135 .short('n')
135 .short('n')
136 .action(clap::ArgAction::SetTrue)
136 .action(clap::ArgAction::SetTrue)
137 .long("no-status"),
137 .long("no-status"),
138 )
138 )
139 .arg(
139 .arg(
140 Arg::new("verbose")
140 Arg::new("verbose")
141 .help("enable additional output")
141 .help("enable additional output")
142 .short('v')
142 .short('v')
143 .action(clap::ArgAction::SetTrue)
143 .action(clap::ArgAction::SetTrue)
144 .long("verbose"),
144 .long("verbose"),
145 )
145 )
146 .arg(
146 .arg(
147 Arg::new("rev")
147 Arg::new("rev")
148 .help("show difference from/to revision")
148 .help("show difference from/to revision")
149 .long("rev")
149 .long("rev")
150 .num_args(1)
150 .num_args(1)
151 .action(clap::ArgAction::Append)
151 .action(clap::ArgAction::Append)
152 .value_name("REV"),
152 .value_name("REV"),
153 )
153 )
154 }
154 }
155
155
156 fn parse_revpair(
156 fn parse_revpair(
157 repo: &Repo,
157 repo: &Repo,
158 revs: Option<Vec<String>>,
158 revs: Option<Vec<String>>,
159 ) -> Result<Option<(Revision, Revision)>, CommandError> {
159 ) -> Result<Option<(Revision, Revision)>, CommandError> {
160 let revs = match revs {
160 let revs = match revs {
161 None => return Ok(None),
161 None => return Ok(None),
162 Some(revs) => revs,
162 Some(revs) => revs,
163 };
163 };
164 if revs.is_empty() {
164 if revs.is_empty() {
165 return Ok(None);
165 return Ok(None);
166 }
166 }
167 if revs.len() != 2 {
167 if revs.len() != 2 {
168 return Err(CommandError::unsupported("expected 0 or 2 --rev flags"));
168 return Err(CommandError::unsupported("expected 0 or 2 --rev flags"));
169 }
169 }
170
170
171 let rev1 = &revs[0];
171 let rev1 = &revs[0];
172 let rev2 = &revs[1];
172 let rev2 = &revs[1];
173 let rev1 = hg::revset::resolve_single(rev1, repo)
173 let rev1 = hg::revset::resolve_single(rev1, repo)
174 .map_err(|e| (e, rev1.as_str()))?;
174 .map_err(|e| (e, rev1.as_str()))?;
175 let rev2 = hg::revset::resolve_single(rev2, repo)
175 let rev2 = hg::revset::resolve_single(rev2, repo)
176 .map_err(|e| (e, rev2.as_str()))?;
176 .map_err(|e| (e, rev2.as_str()))?;
177 Ok(Some((rev1, rev2)))
177 Ok(Some((rev1, rev2)))
178 }
178 }
179
179
180 /// Pure data type allowing the caller to specify file states to display
180 /// Pure data type allowing the caller to specify file states to display
181 #[derive(Copy, Clone, Debug)]
181 #[derive(Copy, Clone, Debug)]
182 pub struct DisplayStates {
182 pub struct DisplayStates {
183 pub modified: bool,
183 pub modified: bool,
184 pub added: bool,
184 pub added: bool,
185 pub removed: bool,
185 pub removed: bool,
186 pub clean: bool,
186 pub clean: bool,
187 pub deleted: bool,
187 pub deleted: bool,
188 pub unknown: bool,
188 pub unknown: bool,
189 pub ignored: bool,
189 pub ignored: bool,
190 }
190 }
191
191
192 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
192 pub const DEFAULT_DISPLAY_STATES: DisplayStates = DisplayStates {
193 modified: true,
193 modified: true,
194 added: true,
194 added: true,
195 removed: true,
195 removed: true,
196 clean: false,
196 clean: false,
197 deleted: true,
197 deleted: true,
198 unknown: true,
198 unknown: true,
199 ignored: false,
199 ignored: false,
200 };
200 };
201
201
202 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
202 pub const ALL_DISPLAY_STATES: DisplayStates = DisplayStates {
203 modified: true,
203 modified: true,
204 added: true,
204 added: true,
205 removed: true,
205 removed: true,
206 clean: true,
206 clean: true,
207 deleted: true,
207 deleted: true,
208 unknown: true,
208 unknown: true,
209 ignored: true,
209 ignored: true,
210 };
210 };
211
211
212 impl DisplayStates {
212 impl DisplayStates {
213 pub fn is_empty(&self) -> bool {
213 pub fn is_empty(&self) -> bool {
214 !(self.modified
214 !(self.modified
215 || self.added
215 || self.added
216 || self.removed
216 || self.removed
217 || self.clean
217 || self.clean
218 || self.deleted
218 || self.deleted
219 || self.unknown
219 || self.unknown
220 || self.ignored)
220 || self.ignored)
221 }
221 }
222 }
222 }
223
223
224 fn has_unfinished_merge(repo: &Repo) -> Result<bool, CommandError> {
224 fn has_unfinished_merge(repo: &Repo) -> Result<bool, CommandError> {
225 Ok(repo.dirstate_parents()?.is_merge())
225 Ok(repo.dirstate_parents()?.is_merge())
226 }
226 }
227
227
228 fn has_unfinished_state(repo: &Repo) -> Result<bool, CommandError> {
228 fn has_unfinished_state(repo: &Repo) -> Result<bool, CommandError> {
229 // These are all the known values for the [fname] argument of
229 // These are all the known values for the [fname] argument of
230 // [addunfinished] function in [state.py]
230 // [addunfinished] function in [state.py]
231 let known_state_files: &[&str] = &[
231 let known_state_files: &[&str] = &[
232 "bisect.state",
232 "bisect.state",
233 "graftstate",
233 "graftstate",
234 "histedit-state",
234 "histedit-state",
235 "rebasestate",
235 "rebasestate",
236 "shelvedstate",
236 "shelvedstate",
237 "transplant/journal",
237 "transplant/journal",
238 "updatestate",
238 "updatestate",
239 ];
239 ];
240 if has_unfinished_merge(repo)? {
240 if has_unfinished_merge(repo)? {
241 return Ok(true);
241 return Ok(true);
242 };
242 };
243 for f in known_state_files {
243 for f in known_state_files {
244 if repo.hg_vfs().join(f).exists() {
244 if repo.hg_vfs().join(f).exists() {
245 return Ok(true);
245 return Ok(true);
246 }
246 }
247 }
247 }
248 Ok(false)
248 Ok(false)
249 }
249 }
250
250
251 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
251 pub fn run(invocation: &crate::CliInvocation) -> Result<(), CommandError> {
252 // TODO: lift these limitations
252 // TODO: lift these limitations
253 if invocation
253 if invocation
254 .config
254 .config
255 .get(b"commands", b"status.terse")
255 .get(b"commands", b"status.terse")
256 .is_some()
256 .is_some()
257 {
257 {
258 return Err(CommandError::unsupported(
258 return Err(CommandError::unsupported(
259 "status.terse is not yet supported with rhg status",
259 "status.terse is not yet supported with rhg status",
260 ));
260 ));
261 }
261 }
262
262
263 let ui = invocation.ui;
263 let ui = invocation.ui;
264 let config = invocation.config;
264 let config = invocation.config;
265 let args = invocation.subcommand_args;
265 let args = invocation.subcommand_args;
266
266
267 let revs = args.get_many::<String>("rev");
267 let revs = args.get_many::<String>("rev");
268 let print0 = args.get_flag("print0");
268 let print0 = args.get_flag("print0");
269 let verbose = args.get_flag("verbose")
269 let verbose = args.get_flag("verbose")
270 || config.get_bool(b"ui", b"verbose")?
270 || config.get_bool(b"ui", b"verbose")?
271 || config.get_bool(b"commands", b"status.verbose")?;
271 || config.get_bool(b"commands", b"status.verbose")?;
272 let verbose = verbose && !print0;
272 let verbose = verbose && !print0;
273
273
274 let all = args.get_flag("all");
274 let all = args.get_flag("all");
275 let display_states = if all {
275 let display_states = if all {
276 // TODO when implementing `--quiet`: it excludes clean files
276 // TODO when implementing `--quiet`: it excludes clean files
277 // from `--all`
277 // from `--all`
278 ALL_DISPLAY_STATES
278 ALL_DISPLAY_STATES
279 } else {
279 } else {
280 let requested = DisplayStates {
280 let requested = DisplayStates {
281 modified: args.get_flag("modified"),
281 modified: args.get_flag("modified"),
282 added: args.get_flag("added"),
282 added: args.get_flag("added"),
283 removed: args.get_flag("removed"),
283 removed: args.get_flag("removed"),
284 clean: args.get_flag("clean"),
284 clean: args.get_flag("clean"),
285 deleted: args.get_flag("deleted"),
285 deleted: args.get_flag("deleted"),
286 unknown: args.get_flag("unknown"),
286 unknown: args.get_flag("unknown"),
287 ignored: args.get_flag("ignored"),
287 ignored: args.get_flag("ignored"),
288 };
288 };
289 if requested.is_empty() {
289 if requested.is_empty() {
290 DEFAULT_DISPLAY_STATES
290 DEFAULT_DISPLAY_STATES
291 } else {
291 } else {
292 requested
292 requested
293 }
293 }
294 };
294 };
295 let no_status = args.get_flag("no-status");
295 let no_status = args.get_flag("no-status");
296 let list_copies = all
296 let list_copies = all
297 || args.get_flag("copies")
297 || args.get_flag("copies")
298 || config.get_bool(b"ui", b"statuscopies")?;
298 || config.get_bool(b"ui", b"statuscopies")?;
299
299
300 let repo = invocation.repo?;
300 let repo = invocation.repo?;
301 let revpair = parse_revpair(repo, revs.map(|i| i.cloned().collect()))?;
301 let revpair = parse_revpair(repo, revs.map(|i| i.cloned().collect()))?;
302
302
303 if verbose && has_unfinished_state(repo)? {
303 if verbose && has_unfinished_state(repo)? {
304 return Err(CommandError::unsupported(
304 return Err(CommandError::unsupported(
305 "verbose status output is not supported by rhg (and is needed because we're in an unfinished operation)",
305 "verbose status output is not supported by rhg (and is needed because we're in an unfinished operation)",
306 ));
306 ));
307 }
307 }
308
308
309 let mut dmap = repo.dirstate_map_mut()?;
309 let mut dmap = repo.dirstate_map_mut()?;
310
310
311 let check_exec = hg::checkexec::check_exec(repo.working_directory_path());
311 let check_exec = hg::checkexec::check_exec(repo.working_directory_path());
312
312
313 let options = StatusOptions {
313 let options = StatusOptions {
314 check_exec,
314 check_exec,
315 list_clean: display_states.clean,
315 list_clean: display_states.clean,
316 list_unknown: display_states.unknown,
316 list_unknown: display_states.unknown,
317 list_ignored: display_states.ignored,
317 list_ignored: display_states.ignored,
318 list_copies,
318 list_copies,
319 collect_traversed_dirs: false,
319 collect_traversed_dirs: false,
320 };
320 };
321
321
322 type StatusResult<'a> =
322 type StatusResult<'a> =
323 Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>;
323 Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>;
324
324
325 let relative_status = config
325 let relative_status = config
326 .get_option(b"commands", b"status.relative")?
326 .get_option(b"commands", b"status.relative")?
327 .expect("commands.status.relative should have a default value");
327 .expect("commands.status.relative should have a default value");
328
328
329 let relativize_paths = relative_status || {
329 let relativize_paths = relative_status || {
330 // See in Python code with `getuipathfn` usage in `commands.py`.
330 // See in Python code with `getuipathfn` usage in `commands.py`.
331 let legacy_relative_behavior = args.contains_id("file");
331 let legacy_relative_behavior = args.contains_id("file");
332 match relative_paths(invocation.config)? {
332 match relative_paths(invocation.config)? {
333 RelativePaths::Legacy => legacy_relative_behavior,
333 RelativePaths::Legacy => legacy_relative_behavior,
334 RelativePaths::Bool(v) => v,
334 RelativePaths::Bool(v) => v,
335 }
335 }
336 };
336 };
337
337
338 let mut output = DisplayStatusPaths {
338 let mut output = DisplayStatusPaths {
339 ui,
339 ui,
340 no_status,
340 no_status,
341 relativize: if relativize_paths {
341 relativize: if relativize_paths {
342 Some(RelativizePaths::new(repo)?)
342 Some(RelativizePaths::new(repo)?)
343 } else {
343 } else {
344 None
344 None
345 },
345 },
346 print0,
346 print0,
347 };
347 };
348
348
349 let after_status = |res: StatusResult| -> Result<_, CommandError> {
349 let after_status = |res: StatusResult| -> Result<_, CommandError> {
350 let (mut ds_status, pattern_warnings) = res?;
350 let (mut ds_status, pattern_warnings) = res?;
351 for warning in pattern_warnings {
351 for warning in pattern_warnings {
352 ui.write_stderr(&format_pattern_file_warning(&warning, repo))?;
352 ui.write_stderr(&format_pattern_file_warning(&warning, repo))?;
353 }
353 }
354
354
355 for (path, error) in take(&mut ds_status.bad) {
355 for (path, error) in take(&mut ds_status.bad) {
356 let error = match error {
356 let error = match error {
357 hg::BadMatch::OsError(code) => {
357 hg::BadMatch::OsError(code) => {
358 std::io::Error::from_raw_os_error(code).to_string()
358 std::io::Error::from_raw_os_error(code).to_string()
359 }
359 }
360 hg::BadMatch::BadType(ty) => {
360 hg::BadMatch::BadType(ty) => {
361 format!("unsupported file type (type is {})", ty)
361 format!("unsupported file type (type is {})", ty)
362 }
362 }
363 };
363 };
364 ui.write_stderr(&format_bytes!(
364 ui.write_stderr(&format_bytes!(
365 b"{}: {}\n",
365 b"{}: {}\n",
366 path.as_bytes(),
366 path.as_bytes(),
367 error.as_bytes()
367 error.as_bytes()
368 ))?
368 ))?
369 }
369 }
370 if !ds_status.unsure.is_empty() {
370 if !ds_status.unsure.is_empty() {
371 info!(
371 info!(
372 "Files to be rechecked by retrieval from filelog: {:?}",
372 "Files to be rechecked by retrieval from filelog: {:?}",
373 ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
373 ds_status.unsure.iter().map(|s| &s.path).collect::<Vec<_>>()
374 );
374 );
375 }
375 }
376 let mut fixup = Vec::new();
376 let mut fixup = Vec::new();
377 if !ds_status.unsure.is_empty()
377 if !ds_status.unsure.is_empty()
378 && (display_states.modified || display_states.clean)
378 && (display_states.modified || display_states.clean)
379 {
379 {
380 let p1 = repo.dirstate_parents()?.p1;
380 let p1 = repo.dirstate_parents()?.p1;
381 let manifest = repo.manifest_for_node(p1).map_err(|e| {
381 let manifest = repo.manifest_for_node(p1).map_err(|e| {
382 CommandError::from((e, &*format!("{:x}", p1.short())))
382 CommandError::from((e, &*format!("{:x}", p1.short())))
383 })?;
383 })?;
384 let working_directory_vfs = repo.working_directory_vfs();
384 let working_directory_vfs = repo.working_directory_vfs();
385 let store_vfs = repo.store_vfs();
385 let store_vfs = repo.store_vfs();
386 let revlog_open_options = repo.default_revlog_options(false)?;
386 let res: Vec<_> = take(&mut ds_status.unsure)
387 let res: Vec<_> = take(&mut ds_status.unsure)
387 .into_par_iter()
388 .into_par_iter()
388 .map(|to_check| {
389 .map(|to_check| {
389 // The compiler seems to get a bit confused with complex
390 // The compiler seems to get a bit confused with complex
390 // inference when using a parallel iterator + map
391 // inference when using a parallel iterator + map
391 // + map_err + collect, so let's just inline some of the
392 // + map_err + collect, so let's just inline some of the
392 // logic.
393 // logic.
393 match unsure_is_modified(
394 match unsure_is_modified(
394 working_directory_vfs,
395 working_directory_vfs,
395 store_vfs,
396 store_vfs,
396 check_exec,
397 check_exec,
397 &manifest,
398 &manifest,
398 &to_check.path,
399 &to_check.path,
400 revlog_open_options,
399 ) {
401 ) {
400 Err(HgError::IoError { .. }) => {
402 Err(HgError::IoError { .. }) => {
401 // IO errors most likely stem from the file being
403 // IO errors most likely stem from the file being
402 // deleted even though we know it's in the
404 // deleted even though we know it's in the
403 // dirstate.
405 // dirstate.
404 Ok((to_check, UnsureOutcome::Deleted))
406 Ok((to_check, UnsureOutcome::Deleted))
405 }
407 }
406 Ok(outcome) => Ok((to_check, outcome)),
408 Ok(outcome) => Ok((to_check, outcome)),
407 Err(e) => Err(e),
409 Err(e) => Err(e),
408 }
410 }
409 })
411 })
410 .collect::<Result<_, _>>()?;
412 .collect::<Result<_, _>>()?;
411 for (status_path, outcome) in res.into_iter() {
413 for (status_path, outcome) in res.into_iter() {
412 match outcome {
414 match outcome {
413 UnsureOutcome::Clean => {
415 UnsureOutcome::Clean => {
414 if display_states.clean {
416 if display_states.clean {
415 ds_status.clean.push(status_path.clone());
417 ds_status.clean.push(status_path.clone());
416 }
418 }
417 fixup.push(status_path.path.into_owned())
419 fixup.push(status_path.path.into_owned())
418 }
420 }
419 UnsureOutcome::Modified => {
421 UnsureOutcome::Modified => {
420 if display_states.modified {
422 if display_states.modified {
421 ds_status.modified.push(status_path);
423 ds_status.modified.push(status_path);
422 }
424 }
423 }
425 }
424 UnsureOutcome::Deleted => {
426 UnsureOutcome::Deleted => {
425 if display_states.deleted {
427 if display_states.deleted {
426 ds_status.deleted.push(status_path);
428 ds_status.deleted.push(status_path);
427 }
429 }
428 }
430 }
429 }
431 }
430 }
432 }
431 }
433 }
432
434
433 let dirstate_write_needed = ds_status.dirty;
435 let dirstate_write_needed = ds_status.dirty;
434 let filesystem_time_at_status_start =
436 let filesystem_time_at_status_start =
435 ds_status.filesystem_time_at_status_start;
437 ds_status.filesystem_time_at_status_start;
436
438
437 output.output(display_states, ds_status)?;
439 output.output(display_states, ds_status)?;
438
440
439 Ok((
441 Ok((
440 fixup,
442 fixup,
441 dirstate_write_needed,
443 dirstate_write_needed,
442 filesystem_time_at_status_start,
444 filesystem_time_at_status_start,
443 ))
445 ))
444 };
446 };
445 let (narrow_matcher, narrow_warnings) = narrow::matcher(repo)?;
447 let (narrow_matcher, narrow_warnings) = narrow::matcher(repo)?;
446
448
447 match revpair {
449 match revpair {
448 Some((rev1, rev2)) => {
450 Some((rev1, rev2)) => {
449 let mut ds_status = DirstateStatus::default();
451 let mut ds_status = DirstateStatus::default();
450 if list_copies {
452 if list_copies {
451 return Err(CommandError::unsupported(
453 return Err(CommandError::unsupported(
452 "status --rev --rev with copy information is not implemented yet",
454 "status --rev --rev with copy information is not implemented yet",
453 ));
455 ));
454 }
456 }
455
457
456 let stat = hg::operations::status_rev_rev_no_copies(
458 let stat = hg::operations::status_rev_rev_no_copies(
457 repo,
459 repo,
458 rev1,
460 rev1,
459 rev2,
461 rev2,
460 narrow_matcher,
462 narrow_matcher,
461 )?;
463 )?;
462 for entry in stat.iter() {
464 for entry in stat.iter() {
463 let (path, status) = entry?;
465 let (path, status) = entry?;
464 let path = StatusPath {
466 let path = StatusPath {
465 path: Cow::Borrowed(path),
467 path: Cow::Borrowed(path),
466 copy_source: None,
468 copy_source: None,
467 };
469 };
468 match status {
470 match status {
469 hg::operations::DiffStatus::Removed => {
471 hg::operations::DiffStatus::Removed => {
470 if display_states.removed {
472 if display_states.removed {
471 ds_status.removed.push(path)
473 ds_status.removed.push(path)
472 }
474 }
473 }
475 }
474 hg::operations::DiffStatus::Added => {
476 hg::operations::DiffStatus::Added => {
475 if display_states.added {
477 if display_states.added {
476 ds_status.added.push(path)
478 ds_status.added.push(path)
477 }
479 }
478 }
480 }
479 hg::operations::DiffStatus::Modified => {
481 hg::operations::DiffStatus::Modified => {
480 if display_states.modified {
482 if display_states.modified {
481 ds_status.modified.push(path)
483 ds_status.modified.push(path)
482 }
484 }
483 }
485 }
484 hg::operations::DiffStatus::Matching => {
486 hg::operations::DiffStatus::Matching => {
485 if display_states.clean {
487 if display_states.clean {
486 ds_status.clean.push(path)
488 ds_status.clean.push(path)
487 }
489 }
488 }
490 }
489 }
491 }
490 }
492 }
491 output.output(display_states, ds_status)?;
493 output.output(display_states, ds_status)?;
492 return Ok(());
494 return Ok(());
493 }
495 }
494 None => (),
496 None => (),
495 }
497 }
496
498
497 let (sparse_matcher, sparse_warnings) = sparse::matcher(repo)?;
499 let (sparse_matcher, sparse_warnings) = sparse::matcher(repo)?;
498 let matcher = match (repo.has_narrow(), repo.has_sparse()) {
500 let matcher = match (repo.has_narrow(), repo.has_sparse()) {
499 (true, true) => {
501 (true, true) => {
500 Box::new(IntersectionMatcher::new(narrow_matcher, sparse_matcher))
502 Box::new(IntersectionMatcher::new(narrow_matcher, sparse_matcher))
501 }
503 }
502 (true, false) => narrow_matcher,
504 (true, false) => narrow_matcher,
503 (false, true) => sparse_matcher,
505 (false, true) => sparse_matcher,
504 (false, false) => Box::new(AlwaysMatcher),
506 (false, false) => Box::new(AlwaysMatcher),
505 };
507 };
506 let matcher = match args.get_many::<std::ffi::OsString>("file") {
508 let matcher = match args.get_many::<std::ffi::OsString>("file") {
507 None => matcher,
509 None => matcher,
508 Some(files) => {
510 Some(files) => {
509 let patterns: Vec<Vec<u8>> = files
511 let patterns: Vec<Vec<u8>> = files
510 .filter(|s| !s.is_empty())
512 .filter(|s| !s.is_empty())
511 .map(get_bytes_from_os_str)
513 .map(get_bytes_from_os_str)
512 .collect();
514 .collect();
513 for file in &patterns {
515 for file in &patterns {
514 if file.starts_with(b"set:") {
516 if file.starts_with(b"set:") {
515 return Err(CommandError::unsupported("fileset"));
517 return Err(CommandError::unsupported("fileset"));
516 }
518 }
517 }
519 }
518 let cwd = hg::utils::current_dir()?;
520 let cwd = hg::utils::current_dir()?;
519 let root = repo.working_directory_path();
521 let root = repo.working_directory_path();
520 let ignore_patterns = parse_pattern_args(patterns, &cwd, root)?;
522 let ignore_patterns = parse_pattern_args(patterns, &cwd, root)?;
521 let files_matcher =
523 let files_matcher =
522 hg::matchers::PatternMatcher::new(ignore_patterns)?;
524 hg::matchers::PatternMatcher::new(ignore_patterns)?;
523 Box::new(IntersectionMatcher::new(
525 Box::new(IntersectionMatcher::new(
524 Box::new(files_matcher),
526 Box::new(files_matcher),
525 matcher,
527 matcher,
526 ))
528 ))
527 }
529 }
528 };
530 };
529
531
530 print_narrow_sparse_warnings(
532 print_narrow_sparse_warnings(
531 &narrow_warnings,
533 &narrow_warnings,
532 &sparse_warnings,
534 &sparse_warnings,
533 ui,
535 ui,
534 repo,
536 repo,
535 )?;
537 )?;
536 let (fixup, mut dirstate_write_needed, filesystem_time_at_status_start) =
538 let (fixup, mut dirstate_write_needed, filesystem_time_at_status_start) =
537 dmap.with_status(
539 dmap.with_status(
538 matcher.as_ref(),
540 matcher.as_ref(),
539 repo.working_directory_path().to_owned(),
541 repo.working_directory_path().to_owned(),
540 ignore_files(repo, config),
542 ignore_files(repo, config),
541 options,
543 options,
542 after_status,
544 after_status,
543 )?;
545 )?;
544
546
545 // Development config option to test write races
547 // Development config option to test write races
546 if let Err(e) =
548 if let Err(e) =
547 debug_wait_for_file(config, "status.pre-dirstate-write-file")
549 debug_wait_for_file(config, "status.pre-dirstate-write-file")
548 {
550 {
549 ui.write_stderr(e.as_bytes()).ok();
551 ui.write_stderr(e.as_bytes()).ok();
550 }
552 }
551
553
552 if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
554 if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
553 && !dirstate_write_needed
555 && !dirstate_write_needed
554 {
556 {
555 // Nothing to update
557 // Nothing to update
556 return Ok(());
558 return Ok(());
557 }
559 }
558
560
559 // Update the dirstate on disk if we can
561 // Update the dirstate on disk if we can
560 let with_lock_result =
562 let with_lock_result =
561 repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
563 repo.try_with_wlock_no_wait(|| -> Result<(), CommandError> {
562 if let Some(mtime_boundary) = filesystem_time_at_status_start {
564 if let Some(mtime_boundary) = filesystem_time_at_status_start {
563 for hg_path in fixup {
565 for hg_path in fixup {
564 use std::os::unix::fs::MetadataExt;
566 use std::os::unix::fs::MetadataExt;
565 let fs_path = hg_path_to_path_buf(&hg_path)
567 let fs_path = hg_path_to_path_buf(&hg_path)
566 .expect("HgPath conversion");
568 .expect("HgPath conversion");
567 // Specifically do not reuse `fs_metadata` from
569 // Specifically do not reuse `fs_metadata` from
568 // `unsure_is_clean` which was needed before reading
570 // `unsure_is_clean` which was needed before reading
569 // contents. Here we access metadata again after reading
571 // contents. Here we access metadata again after reading
570 // content, in case it changed in the meantime.
572 // content, in case it changed in the meantime.
571 let metadata_res = repo
573 let metadata_res = repo
572 .working_directory_vfs()
574 .working_directory_vfs()
573 .symlink_metadata(&fs_path);
575 .symlink_metadata(&fs_path);
574 let fs_metadata = match metadata_res {
576 let fs_metadata = match metadata_res {
575 Ok(meta) => meta,
577 Ok(meta) => meta,
576 Err(err) => match err {
578 Err(err) => match err {
577 HgError::IoError { .. } => {
579 HgError::IoError { .. } => {
578 // The file has probably been deleted. In any
580 // The file has probably been deleted. In any
579 // case, it was in the dirstate before, so
581 // case, it was in the dirstate before, so
580 // let's ignore the error.
582 // let's ignore the error.
581 continue;
583 continue;
582 }
584 }
583 _ => return Err(err.into()),
585 _ => return Err(err.into()),
584 },
586 },
585 };
587 };
586 if let Some(mtime) =
588 if let Some(mtime) =
587 TruncatedTimestamp::for_reliable_mtime_of(
589 TruncatedTimestamp::for_reliable_mtime_of(
588 &fs_metadata,
590 &fs_metadata,
589 &mtime_boundary,
591 &mtime_boundary,
590 )
592 )
591 .when_reading_file(&fs_path)?
593 .when_reading_file(&fs_path)?
592 {
594 {
593 let mode = fs_metadata.mode();
595 let mode = fs_metadata.mode();
594 let size = fs_metadata.len();
596 let size = fs_metadata.len();
595 dmap.set_clean(&hg_path, mode, size as u32, mtime)?;
597 dmap.set_clean(&hg_path, mode, size as u32, mtime)?;
596 dirstate_write_needed = true
598 dirstate_write_needed = true
597 }
599 }
598 }
600 }
599 }
601 }
600 drop(dmap); // Avoid "already mutably borrowed" RefCell panics
602 drop(dmap); // Avoid "already mutably borrowed" RefCell panics
601 if dirstate_write_needed {
603 if dirstate_write_needed {
602 repo.write_dirstate()?
604 repo.write_dirstate()?
603 }
605 }
604 Ok(())
606 Ok(())
605 });
607 });
606 match with_lock_result {
608 match with_lock_result {
607 Ok(closure_result) => closure_result?,
609 Ok(closure_result) => closure_result?,
608 Err(LockError::AlreadyHeld) => {
610 Err(LockError::AlreadyHeld) => {
609 // Not updating the dirstate is not ideal but not critical:
611 // Not updating the dirstate is not ideal but not critical:
610 // don’t keep our caller waiting until some other Mercurial
612 // don’t keep our caller waiting until some other Mercurial
611 // process releases the lock.
613 // process releases the lock.
612 log::info!("not writing dirstate from `status`: lock is held")
614 log::info!("not writing dirstate from `status`: lock is held")
613 }
615 }
614 Err(LockError::Other(HgError::IoError { error, .. }))
616 Err(LockError::Other(HgError::IoError { error, .. }))
615 if error.kind() == io::ErrorKind::PermissionDenied =>
617 if error.kind() == io::ErrorKind::PermissionDenied =>
616 {
618 {
617 // `hg status` on a read-only repository is fine
619 // `hg status` on a read-only repository is fine
618 }
620 }
619 Err(LockError::Other(error)) => {
621 Err(LockError::Other(error)) => {
620 // Report other I/O errors
622 // Report other I/O errors
621 Err(error)?
623 Err(error)?
622 }
624 }
623 }
625 }
624 Ok(())
626 Ok(())
625 }
627 }
626
628
627 fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
629 fn ignore_files(repo: &Repo, config: &Config) -> Vec<PathBuf> {
628 let mut ignore_files = Vec::new();
630 let mut ignore_files = Vec::new();
629 let repo_ignore = repo.working_directory_vfs().join(".hgignore");
631 let repo_ignore = repo.working_directory_vfs().join(".hgignore");
630 if repo_ignore.exists() {
632 if repo_ignore.exists() {
631 ignore_files.push(repo_ignore)
633 ignore_files.push(repo_ignore)
632 }
634 }
633 for (key, value) in config.iter_section(b"ui") {
635 for (key, value) in config.iter_section(b"ui") {
634 if key == b"ignore" || key.starts_with(b"ignore.") {
636 if key == b"ignore" || key.starts_with(b"ignore.") {
635 let path = get_path_from_bytes(value);
637 let path = get_path_from_bytes(value);
636 // TODO: expand "~/" and environment variable here, like Python
638 // TODO: expand "~/" and environment variable here, like Python
637 // does with `os.path.expanduser` and `os.path.expandvars`
639 // does with `os.path.expanduser` and `os.path.expandvars`
638
640
639 let joined = repo.working_directory_path().join(path);
641 let joined = repo.working_directory_path().join(path);
640 ignore_files.push(joined);
642 ignore_files.push(joined);
641 }
643 }
642 }
644 }
643 ignore_files
645 ignore_files
644 }
646 }
645
647
646 struct DisplayStatusPaths<'a> {
648 struct DisplayStatusPaths<'a> {
647 ui: &'a Ui,
649 ui: &'a Ui,
648 no_status: bool,
650 no_status: bool,
649 relativize: Option<RelativizePaths>,
651 relativize: Option<RelativizePaths>,
650 print0: bool,
652 print0: bool,
651 }
653 }
652
654
653 impl DisplayStatusPaths<'_> {
655 impl DisplayStatusPaths<'_> {
654 // Probably more elegant to use a Deref or Borrow trait rather than
656 // Probably more elegant to use a Deref or Borrow trait rather than
655 // harcode HgPathBuf, but probably not really useful at this point
657 // harcode HgPathBuf, but probably not really useful at this point
656 fn display(
658 fn display(
657 &self,
659 &self,
658 status_prefix: &[u8],
660 status_prefix: &[u8],
659 label: &'static str,
661 label: &'static str,
660 mut paths: Vec<StatusPath<'_>>,
662 mut paths: Vec<StatusPath<'_>>,
661 ) -> Result<(), CommandError> {
663 ) -> Result<(), CommandError> {
662 paths.sort_unstable();
664 paths.sort_unstable();
663 // TODO: get the stdout lock once for the whole loop
665 // TODO: get the stdout lock once for the whole loop
664 // instead of in each write
666 // instead of in each write
665 for StatusPath { path, copy_source } in paths {
667 for StatusPath { path, copy_source } in paths {
666 let relative_path;
668 let relative_path;
667 let relative_source;
669 let relative_source;
668 let (path, copy_source) = if let Some(relativize) =
670 let (path, copy_source) = if let Some(relativize) =
669 &self.relativize
671 &self.relativize
670 {
672 {
671 relative_path = relativize.relativize(&path);
673 relative_path = relativize.relativize(&path);
672 relative_source =
674 relative_source =
673 copy_source.as_ref().map(|s| relativize.relativize(s));
675 copy_source.as_ref().map(|s| relativize.relativize(s));
674 (&*relative_path, relative_source.as_deref())
676 (&*relative_path, relative_source.as_deref())
675 } else {
677 } else {
676 (path.as_bytes(), copy_source.as_ref().map(|s| s.as_bytes()))
678 (path.as_bytes(), copy_source.as_ref().map(|s| s.as_bytes()))
677 };
679 };
678 // TODO: Add a way to use `write_bytes!` instead of `format_bytes!`
680 // TODO: Add a way to use `write_bytes!` instead of `format_bytes!`
679 // in order to stream to stdout instead of allocating an
681 // in order to stream to stdout instead of allocating an
680 // itermediate `Vec<u8>`.
682 // itermediate `Vec<u8>`.
681 if !self.no_status {
683 if !self.no_status {
682 self.ui.write_stdout_labelled(status_prefix, label)?
684 self.ui.write_stdout_labelled(status_prefix, label)?
683 }
685 }
684 let linebreak = if self.print0 { b"\x00" } else { b"\n" };
686 let linebreak = if self.print0 { b"\x00" } else { b"\n" };
685 self.ui.write_stdout_labelled(
687 self.ui.write_stdout_labelled(
686 &format_bytes!(b"{}{}", path, linebreak),
688 &format_bytes!(b"{}{}", path, linebreak),
687 label,
689 label,
688 )?;
690 )?;
689 if let Some(source) = copy_source.filter(|_| !self.no_status) {
691 if let Some(source) = copy_source.filter(|_| !self.no_status) {
690 let label = "status.copied";
692 let label = "status.copied";
691 self.ui.write_stdout_labelled(
693 self.ui.write_stdout_labelled(
692 &format_bytes!(b" {}{}", source, linebreak),
694 &format_bytes!(b" {}{}", source, linebreak),
693 label,
695 label,
694 )?
696 )?
695 }
697 }
696 }
698 }
697 Ok(())
699 Ok(())
698 }
700 }
699
701
700 fn output(
702 fn output(
701 &mut self,
703 &mut self,
702 display_states: DisplayStates,
704 display_states: DisplayStates,
703 ds_status: DirstateStatus,
705 ds_status: DirstateStatus,
704 ) -> Result<(), CommandError> {
706 ) -> Result<(), CommandError> {
705 if display_states.modified {
707 if display_states.modified {
706 self.display(b"M ", "status.modified", ds_status.modified)?;
708 self.display(b"M ", "status.modified", ds_status.modified)?;
707 }
709 }
708 if display_states.added {
710 if display_states.added {
709 self.display(b"A ", "status.added", ds_status.added)?;
711 self.display(b"A ", "status.added", ds_status.added)?;
710 }
712 }
711 if display_states.removed {
713 if display_states.removed {
712 self.display(b"R ", "status.removed", ds_status.removed)?;
714 self.display(b"R ", "status.removed", ds_status.removed)?;
713 }
715 }
714 if display_states.deleted {
716 if display_states.deleted {
715 self.display(b"! ", "status.deleted", ds_status.deleted)?;
717 self.display(b"! ", "status.deleted", ds_status.deleted)?;
716 }
718 }
717 if display_states.unknown {
719 if display_states.unknown {
718 self.display(b"? ", "status.unknown", ds_status.unknown)?;
720 self.display(b"? ", "status.unknown", ds_status.unknown)?;
719 }
721 }
720 if display_states.ignored {
722 if display_states.ignored {
721 self.display(b"I ", "status.ignored", ds_status.ignored)?;
723 self.display(b"I ", "status.ignored", ds_status.ignored)?;
722 }
724 }
723 if display_states.clean {
725 if display_states.clean {
724 self.display(b"C ", "status.clean", ds_status.clean)?;
726 self.display(b"C ", "status.clean", ds_status.clean)?;
725 }
727 }
726 Ok(())
728 Ok(())
727 }
729 }
728 }
730 }
729
731
730 /// Outcome of the additional check for an ambiguous tracked file
732 /// Outcome of the additional check for an ambiguous tracked file
731 enum UnsureOutcome {
733 enum UnsureOutcome {
732 /// The file is actually clean
734 /// The file is actually clean
733 Clean,
735 Clean,
734 /// The file has been modified
736 /// The file has been modified
735 Modified,
737 Modified,
736 /// The file was deleted on disk (or became another type of fs entry)
738 /// The file was deleted on disk (or became another type of fs entry)
737 Deleted,
739 Deleted,
738 }
740 }
739
741
740 /// Check if a file is modified by comparing actual repo store and file system.
742 /// Check if a file is modified by comparing actual repo store and file system.
741 ///
743 ///
742 /// This meant to be used for those that the dirstate cannot resolve, due
744 /// This meant to be used for those that the dirstate cannot resolve, due
743 /// to time resolution limits.
745 /// to time resolution limits.
744 fn unsure_is_modified(
746 fn unsure_is_modified(
745 working_directory_vfs: hg::vfs::Vfs,
747 working_directory_vfs: hg::vfs::Vfs,
746 store_vfs: hg::vfs::Vfs,
748 store_vfs: hg::vfs::Vfs,
747 check_exec: bool,
749 check_exec: bool,
748 manifest: &Manifest,
750 manifest: &Manifest,
749 hg_path: &HgPath,
751 hg_path: &HgPath,
752 revlog_open_options: RevlogOpenOptions,
750 ) -> Result<UnsureOutcome, HgError> {
753 ) -> Result<UnsureOutcome, HgError> {
751 let vfs = working_directory_vfs;
754 let vfs = working_directory_vfs;
752 let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
755 let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion");
753 let fs_metadata = vfs.symlink_metadata(&fs_path)?;
756 let fs_metadata = vfs.symlink_metadata(&fs_path)?;
754 let is_symlink = fs_metadata.file_type().is_symlink();
757 let is_symlink = fs_metadata.file_type().is_symlink();
755
758
756 let entry = manifest
759 let entry = manifest
757 .find_by_path(hg_path)?
760 .find_by_path(hg_path)?
758 .expect("ambgious file not in p1");
761 .expect("ambgious file not in p1");
759
762
760 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
763 // TODO: Also account for `FALLBACK_SYMLINK` and `FALLBACK_EXEC` from the
761 // dirstate
764 // dirstate
762 let fs_flags = if is_symlink {
765 let fs_flags = if is_symlink {
763 Some(b'l')
766 Some(b'l')
764 } else if check_exec && has_exec_bit(&fs_metadata) {
767 } else if check_exec && has_exec_bit(&fs_metadata) {
765 Some(b'x')
768 Some(b'x')
766 } else {
769 } else {
767 None
770 None
768 };
771 };
769
772
770 let entry_flags = if check_exec {
773 let entry_flags = if check_exec {
771 entry.flags
774 entry.flags
772 } else if entry.flags == Some(b'x') {
775 } else if entry.flags == Some(b'x') {
773 None
776 None
774 } else {
777 } else {
775 entry.flags
778 entry.flags
776 };
779 };
777
780
778 if entry_flags != fs_flags {
781 if entry_flags != fs_flags {
779 return Ok(UnsureOutcome::Modified);
782 return Ok(UnsureOutcome::Modified);
780 }
783 }
781 let filelog = hg::filelog::Filelog::open_vfs(&store_vfs, hg_path)?;
784 let filelog = hg::filelog::Filelog::open_vfs(
785 &store_vfs,
786 hg_path,
787 revlog_open_options,
788 )?;
782 let fs_len = fs_metadata.len();
789 let fs_len = fs_metadata.len();
783 let file_node = entry.node_id()?;
790 let file_node = entry.node_id()?;
784 let filelog_entry = filelog.entry_for_node(file_node).map_err(|_| {
791 let filelog_entry = filelog.entry_for_node(file_node).map_err(|_| {
785 HgError::corrupted(format!(
792 HgError::corrupted(format!(
786 "filelog {:?} missing node {:?} from manifest",
793 "filelog {:?} missing node {:?} from manifest",
787 hg_path, file_node
794 hg_path, file_node
788 ))
795 ))
789 })?;
796 })?;
790 if filelog_entry.file_data_len_not_equal_to(fs_len) {
797 if filelog_entry.file_data_len_not_equal_to(fs_len) {
791 // No need to read file contents:
798 // No need to read file contents:
792 // it cannot be equal if it has a different length.
799 // it cannot be equal if it has a different length.
793 return Ok(UnsureOutcome::Modified);
800 return Ok(UnsureOutcome::Modified);
794 }
801 }
795
802
796 let p1_filelog_data = filelog_entry.data()?;
803 let p1_filelog_data = filelog_entry.data()?;
797 let p1_contents = p1_filelog_data.file_data()?;
804 let p1_contents = p1_filelog_data.file_data()?;
798 if p1_contents.len() as u64 != fs_len {
805 if p1_contents.len() as u64 != fs_len {
799 // No need to read file contents:
806 // No need to read file contents:
800 // it cannot be equal if it has a different length.
807 // it cannot be equal if it has a different length.
801 return Ok(UnsureOutcome::Modified);
808 return Ok(UnsureOutcome::Modified);
802 }
809 }
803
810
804 let fs_contents = if is_symlink {
811 let fs_contents = if is_symlink {
805 get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
812 get_bytes_from_os_string(vfs.read_link(fs_path)?.into_os_string())
806 } else {
813 } else {
807 vfs.read(fs_path)?
814 vfs.read(fs_path)?
808 };
815 };
809
816
810 Ok(if p1_contents != &*fs_contents {
817 Ok(if p1_contents != &*fs_contents {
811 UnsureOutcome::Modified
818 UnsureOutcome::Modified
812 } else {
819 } else {
813 UnsureOutcome::Clean
820 UnsureOutcome::Clean
814 })
821 })
815 }
822 }
@@ -1,60 +1,63 b''
1 import struct
1 import unittest
2 import unittest
2
3
3 try:
4 try:
4 from mercurial import rustext
5 from mercurial import rustext
5
6
6 rustext.__name__ # trigger immediate actual import
7 rustext.__name__ # trigger immediate actual import
7 except ImportError:
8 except ImportError:
8 rustext = None
9 rustext = None
9 else:
10 else:
10 from mercurial.rustext import revlog
11 from mercurial.rustext import revlog
11
12
12 # this would fail already without appropriate ancestor.__package__
13 # this would fail already without appropriate ancestor.__package__
13 from mercurial.rustext.ancestor import LazyAncestors
14 from mercurial.rustext.ancestor import LazyAncestors
14
15
15 from mercurial.testing import revlog as revlogtesting
16 from mercurial.testing import revlog as revlogtesting
16
17
18 header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0]
19
17
20
18 @unittest.skipIf(
21 @unittest.skipIf(
19 rustext is None,
22 rustext is None,
20 "rustext module revlog relies on is not available",
23 "rustext module revlog relies on is not available",
21 )
24 )
22 class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
25 class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase):
23 def test_heads(self):
26 def test_heads(self):
24 idx = self.parseindex()
27 idx = self.parseindex()
25 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
28 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
26 self.assertEqual(rustidx.headrevs(), idx.headrevs())
29 self.assertEqual(rustidx.headrevs(), idx.headrevs())
27
30
28 def test_get_cindex(self):
31 def test_get_cindex(self):
29 # drop me once we no longer need the method for shortest node
32 # drop me once we no longer need the method for shortest node
30 idx = self.parseindex()
33 idx = self.parseindex()
31 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
34 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
32 cidx = rustidx.get_cindex()
35 cidx = rustidx.get_cindex()
33 self.assertTrue(idx is cidx)
36 self.assertTrue(idx is cidx)
34
37
35 def test_len(self):
38 def test_len(self):
36 idx = self.parseindex()
39 idx = self.parseindex()
37 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
40 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
38 self.assertEqual(len(rustidx), len(idx))
41 self.assertEqual(len(rustidx), len(idx))
39
42
40 def test_ancestors(self):
43 def test_ancestors(self):
41 idx = self.parseindex()
44 idx = self.parseindex()
42 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined)
45 rustidx = revlog.MixedIndex(idx, revlogtesting.data_non_inlined, header)
43 lazy = LazyAncestors(rustidx, [3], 0, True)
46 lazy = LazyAncestors(rustidx, [3], 0, True)
44 # we have two more references to the index:
47 # we have two more references to the index:
45 # - in its inner iterator for __contains__ and __bool__
48 # - in its inner iterator for __contains__ and __bool__
46 # - in the LazyAncestors instance itself (to spawn new iterators)
49 # - in the LazyAncestors instance itself (to spawn new iterators)
47 self.assertTrue(2 in lazy)
50 self.assertTrue(2 in lazy)
48 self.assertTrue(bool(lazy))
51 self.assertTrue(bool(lazy))
49 self.assertEqual(list(lazy), [3, 2, 1, 0])
52 self.assertEqual(list(lazy), [3, 2, 1, 0])
50 # a second time to validate that we spawn new iterators
53 # a second time to validate that we spawn new iterators
51 self.assertEqual(list(lazy), [3, 2, 1, 0])
54 self.assertEqual(list(lazy), [3, 2, 1, 0])
52
55
53 # let's check bool for an empty one
56 # let's check bool for an empty one
54 self.assertFalse(LazyAncestors(idx, [0], 0, False))
57 self.assertFalse(LazyAncestors(idx, [0], 0, False))
55
58
56
59
57 if __name__ == '__main__':
60 if __name__ == '__main__':
58 import silenttestrunner
61 import silenttestrunner
59
62
60 silenttestrunner.main(__name__)
63 silenttestrunner.main(__name__)
General Comments 0
You need to be logged in to leave comments. Login now