##// END OF EJS Templates
revlog: deprecate the compatibility config property...
marmoute -
r51961:41c73325 default
parent child Browse files
Show More
@@ -1,3724 +1,3797 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class revlog:
340 class revlog:
341 """
341 """
342 the underlying revision storage object
342 the underlying revision storage object
343
343
344 A revlog consists of two parts, an index and the revision data.
344 A revlog consists of two parts, an index and the revision data.
345
345
346 The index is a file with a fixed record size containing
346 The index is a file with a fixed record size containing
347 information on each revision, including its nodeid (hash), the
347 information on each revision, including its nodeid (hash), the
348 nodeids of its parents, the position and offset of its data within
348 nodeids of its parents, the position and offset of its data within
349 the data file, and the revision it's based on. Finally, each entry
349 the data file, and the revision it's based on. Finally, each entry
350 contains a linkrev entry that can serve as a pointer to external
350 contains a linkrev entry that can serve as a pointer to external
351 data.
351 data.
352
352
353 The revision data itself is a linear collection of data chunks.
353 The revision data itself is a linear collection of data chunks.
354 Each chunk represents a revision and is usually represented as a
354 Each chunk represents a revision and is usually represented as a
355 delta against the previous chunk. To bound lookup time, runs of
355 delta against the previous chunk. To bound lookup time, runs of
356 deltas are limited to about 2 times the length of the original
356 deltas are limited to about 2 times the length of the original
357 version data. This makes retrieval of a version proportional to
357 version data. This makes retrieval of a version proportional to
358 its size, or O(1) relative to the number of revisions.
358 its size, or O(1) relative to the number of revisions.
359
359
360 Both pieces of the revlog are written to in an append-only
360 Both pieces of the revlog are written to in an append-only
361 fashion, which means we never need to rewrite a file to insert or
361 fashion, which means we never need to rewrite a file to insert or
362 remove data, and can use some simple techniques to avoid the need
362 remove data, and can use some simple techniques to avoid the need
363 for locking while reading.
363 for locking while reading.
364
364
365 If checkambig, indexfile is opened with checkambig=True at
365 If checkambig, indexfile is opened with checkambig=True at
366 writing, to avoid file stat ambiguity.
366 writing, to avoid file stat ambiguity.
367
367
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 index will be mmapped rather than read if it is larger than the
369 index will be mmapped rather than read if it is larger than the
370 configured threshold.
370 configured threshold.
371
371
372 If censorable is True, the revlog can have censored revisions.
372 If censorable is True, the revlog can have censored revisions.
373
373
374 If `upperboundcomp` is not None, this is the expected maximal gain from
374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 compression for the data content.
375 compression for the data content.
376
376
377 `concurrencychecker` is an optional function that receives 3 arguments: a
377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 file handle, a filename, and an expected position. It should check whether
378 file handle, a filename, and an expected position. It should check whether
379 the current position in the file handle is valid, and log/warn/fail (by
379 the current position in the file handle is valid, and log/warn/fail (by
380 raising).
380 raising).
381
381
382 See mercurial/revlogutils/contants.py for details about the content of an
382 See mercurial/revlogutils/contants.py for details about the content of an
383 index entry.
383 index entry.
384 """
384 """
385
385
386 _flagserrorclass = error.RevlogError
386 _flagserrorclass = error.RevlogError
387
387
388 @staticmethod
388 @staticmethod
389 def is_inline_index(header_bytes):
389 def is_inline_index(header_bytes):
390 """Determine if a revlog is inline from the initial bytes of the index"""
390 """Determine if a revlog is inline from the initial bytes of the index"""
391 header = INDEX_HEADER.unpack(header_bytes)[0]
391 header = INDEX_HEADER.unpack(header_bytes)[0]
392
392
393 _format_flags = header & ~0xFFFF
393 _format_flags = header & ~0xFFFF
394 _format_version = header & 0xFFFF
394 _format_version = header & 0xFFFF
395
395
396 features = FEATURES_BY_VERSION[_format_version]
396 features = FEATURES_BY_VERSION[_format_version]
397 return features[b'inline'](_format_flags)
397 return features[b'inline'](_format_flags)
398
398
399 def __init__(
399 def __init__(
400 self,
400 self,
401 opener,
401 opener,
402 target,
402 target,
403 radix,
403 radix,
404 postfix=None, # only exist for `tmpcensored` now
404 postfix=None, # only exist for `tmpcensored` now
405 checkambig=False,
405 checkambig=False,
406 mmaplargeindex=False,
406 mmaplargeindex=False,
407 censorable=False,
407 censorable=False,
408 upperboundcomp=None,
408 upperboundcomp=None,
409 persistentnodemap=False,
409 persistentnodemap=False,
410 concurrencychecker=None,
410 concurrencychecker=None,
411 trypending=False,
411 trypending=False,
412 try_split=False,
412 try_split=False,
413 canonical_parent_order=True,
413 canonical_parent_order=True,
414 ):
414 ):
415 """
415 """
416 create a revlog object
416 create a revlog object
417
417
418 opener is a function that abstracts the file opening operation
418 opener is a function that abstracts the file opening operation
419 and can be used to implement COW semantics or the like.
419 and can be used to implement COW semantics or the like.
420
420
421 `target`: a (KIND, ID) tuple that identify the content stored in
421 `target`: a (KIND, ID) tuple that identify the content stored in
422 this revlog. It help the rest of the code to understand what the revlog
422 this revlog. It help the rest of the code to understand what the revlog
423 is about without having to resort to heuristic and index filename
423 is about without having to resort to heuristic and index filename
424 analysis. Note: that this must be reliably be set by normal code, but
424 analysis. Note: that this must be reliably be set by normal code, but
425 that test, debug, or performance measurement code might not set this to
425 that test, debug, or performance measurement code might not set this to
426 accurate value.
426 accurate value.
427 """
427 """
428 self.upperboundcomp = upperboundcomp
428 self.upperboundcomp = upperboundcomp
429
429
430 self.radix = radix
430 self.radix = radix
431
431
432 self._docket_file = None
432 self._docket_file = None
433 self._indexfile = None
433 self._indexfile = None
434 self._datafile = None
434 self._datafile = None
435 self._sidedatafile = None
435 self._sidedatafile = None
436 self._nodemap_file = None
436 self._nodemap_file = None
437 self.postfix = postfix
437 self.postfix = postfix
438 self._trypending = trypending
438 self._trypending = trypending
439 self._try_split = try_split
439 self._try_split = try_split
440 self.opener = opener
440 self.opener = opener
441 if persistentnodemap:
441 if persistentnodemap:
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443
443
444 assert target[0] in ALL_KINDS
444 assert target[0] in ALL_KINDS
445 assert len(target) == 2
445 assert len(target) == 2
446 self.target = target
446 self.target = target
447 if b'feature-config' in self.opener.options:
447 if b'feature-config' in self.opener.options:
448 self.feature_config = self.opener.options[b'feature-config'].copy()
448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 else:
449 else:
450 self.feature_config = FeatureConfig()
450 self.feature_config = FeatureConfig()
451 self.feature_config.censorable = censorable
451 self.feature_config.censorable = censorable
452 self.feature_config.canonical_parent_order = canonical_parent_order
452 self.feature_config.canonical_parent_order = canonical_parent_order
453 if b'data-config' in self.opener.options:
453 if b'data-config' in self.opener.options:
454 self.data_config = self.opener.options[b'data-config'].copy()
454 self.data_config = self.opener.options[b'data-config'].copy()
455 else:
455 else:
456 self.data_config = DataConfig()
456 self.data_config = DataConfig()
457 self.data_config.check_ambig = checkambig
457 self.data_config.check_ambig = checkambig
458 self.data_config.mmap_large_index = mmaplargeindex
458 self.data_config.mmap_large_index = mmaplargeindex
459 if b'delta-config' in self.opener.options:
459 if b'delta-config' in self.opener.options:
460 self.delta_config = self.opener.options[b'delta-config'].copy()
460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 else:
461 else:
462 self.delta_config = DeltaConfig()
462 self.delta_config = DeltaConfig()
463
463
464 # 3-tuple of (node, rev, text) for a raw revision.
464 # 3-tuple of (node, rev, text) for a raw revision.
465 self._revisioncache = None
465 self._revisioncache = None
466 # Maps rev to chain base rev.
466 # Maps rev to chain base rev.
467 self._chainbasecache = util.lrucachedict(100)
467 self._chainbasecache = util.lrucachedict(100)
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 self._chunkcache = (0, b'')
469 self._chunkcache = (0, b'')
470
470
471 self.index = None
471 self.index = None
472 self._docket = None
472 self._docket = None
473 self._nodemap_docket = None
473 self._nodemap_docket = None
474 # Mapping of partial identifiers to full nodes.
474 # Mapping of partial identifiers to full nodes.
475 self._pcache = {}
475 self._pcache = {}
476
476
477 # other optionnals features
477 # other optionnals features
478
478
479 # Make copy of flag processors so each revlog instance can support
479 # Make copy of flag processors so each revlog instance can support
480 # custom flags.
480 # custom flags.
481 self._flagprocessors = dict(flagutil.flagprocessors)
481 self._flagprocessors = dict(flagutil.flagprocessors)
482
482
483 # 3-tuple of file handles being used for active writing.
483 # 3-tuple of file handles being used for active writing.
484 self._writinghandles = None
484 self._writinghandles = None
485 # prevent nesting of addgroup
485 # prevent nesting of addgroup
486 self._adding_group = None
486 self._adding_group = None
487
487
488 self._loadindex()
488 self._loadindex()
489
489
490 self._concurrencychecker = concurrencychecker
490 self._concurrencychecker = concurrencychecker
491
491
492 @property
492 @property
493 def _generaldelta(self):
493 def _generaldelta(self):
494 """temporary compatibility proxy"""
494 """temporary compatibility proxy"""
495 util.nouideprecwarn(
496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 )
495 return self.delta_config.general_delta
498 return self.delta_config.general_delta
496
499
497 @property
500 @property
498 def _checkambig(self):
501 def _checkambig(self):
499 """temporary compatibility proxy"""
502 """temporary compatibility proxy"""
503 util.nouideprecwarn(
504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 )
500 return self.data_config.check_ambig
506 return self.data_config.check_ambig
501
507
502 @property
508 @property
503 def _mmaplargeindex(self):
509 def _mmaplargeindex(self):
504 """temporary compatibility proxy"""
510 """temporary compatibility proxy"""
511 util.nouideprecwarn(
512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 )
505 return self.data_config.mmap_large_index
514 return self.data_config.mmap_large_index
506
515
507 @property
516 @property
508 def _censorable(self):
517 def _censorable(self):
509 """temporary compatibility proxy"""
518 """temporary compatibility proxy"""
519 util.nouideprecwarn(
520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 )
510 return self.feature_config.censorable
522 return self.feature_config.censorable
511
523
512 @property
524 @property
513 def _chunkcachesize(self):
525 def _chunkcachesize(self):
514 """temporary compatibility proxy"""
526 """temporary compatibility proxy"""
527 util.nouideprecwarn(
528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 )
515 return self.data_config.chunk_cache_size
530 return self.data_config.chunk_cache_size
516
531
517 @property
532 @property
518 def _maxchainlen(self):
533 def _maxchainlen(self):
519 """temporary compatibility proxy"""
534 """temporary compatibility proxy"""
535 util.nouideprecwarn(
536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 )
520 return self.delta_config.max_chain_len
538 return self.delta_config.max_chain_len
521
539
522 @property
540 @property
523 def _deltabothparents(self):
541 def _deltabothparents(self):
524 """temporary compatibility proxy"""
542 """temporary compatibility proxy"""
543 util.nouideprecwarn(
544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 )
525 return self.delta_config.delta_both_parents
546 return self.delta_config.delta_both_parents
526
547
527 @property
548 @property
528 def _candidate_group_chunk_size(self):
549 def _candidate_group_chunk_size(self):
529 """temporary compatibility proxy"""
550 """temporary compatibility proxy"""
551 util.nouideprecwarn(
552 b"use revlog.delta_config.candidate_group_chunk_size",
553 b"6.6",
554 stacklevel=2,
555 )
530 return self.delta_config.candidate_group_chunk_size
556 return self.delta_config.candidate_group_chunk_size
531
557
532 @property
558 @property
533 def _debug_delta(self):
559 def _debug_delta(self):
534 """temporary compatibility proxy"""
560 """temporary compatibility proxy"""
561 util.nouideprecwarn(
562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 )
535 return self.delta_config.debug_delta
564 return self.delta_config.debug_delta
536
565
537 @property
566 @property
538 def _compengine(self):
567 def _compengine(self):
539 """temporary compatibility proxy"""
568 """temporary compatibility proxy"""
569 util.nouideprecwarn(
570 b"use revlog.feature_config.compression_engine",
571 b"6.6",
572 stacklevel=2,
573 )
540 return self.feature_config.compression_engine
574 return self.feature_config.compression_engine
541
575
542 @property
576 @property
543 def _compengineopts(self):
577 def _compengineopts(self):
544 """temporary compatibility proxy"""
578 """temporary compatibility proxy"""
579 util.nouideprecwarn(
580 b"use revlog.feature_config.compression_engine_options",
581 b"6.6",
582 stacklevel=2,
583 )
545 return self.feature_config.compression_engine_options
584 return self.feature_config.compression_engine_options
546
585
547 @property
586 @property
548 def _maxdeltachainspan(self):
587 def _maxdeltachainspan(self):
549 """temporary compatibility proxy"""
588 """temporary compatibility proxy"""
589 util.nouideprecwarn(
590 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
591 )
550 return self.delta_config.max_deltachain_span
592 return self.delta_config.max_deltachain_span
551
593
552 @property
594 @property
553 def _withsparseread(self):
595 def _withsparseread(self):
554 """temporary compatibility proxy"""
596 """temporary compatibility proxy"""
597 util.nouideprecwarn(
598 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
599 )
555 return self.data_config.with_sparse_read
600 return self.data_config.with_sparse_read
556
601
557 @property
602 @property
558 def _sparserevlog(self):
603 def _sparserevlog(self):
559 """temporary compatibility proxy"""
604 """temporary compatibility proxy"""
605 util.nouideprecwarn(
606 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
607 )
560 return self.delta_config.sparse_revlog
608 return self.delta_config.sparse_revlog
561
609
562 @property
610 @property
563 def hassidedata(self):
611 def hassidedata(self):
564 """temporary compatibility proxy"""
612 """temporary compatibility proxy"""
613 util.nouideprecwarn(
614 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
615 )
565 return self.feature_config.has_side_data
616 return self.feature_config.has_side_data
566
617
567 @property
618 @property
568 def _srdensitythreshold(self):
619 def _srdensitythreshold(self):
569 """temporary compatibility proxy"""
620 """temporary compatibility proxy"""
621 util.nouideprecwarn(
622 b"use revlog.data_config.sr_density_threshold",
623 b"6.6",
624 stacklevel=2,
625 )
570 return self.data_config.sr_density_threshold
626 return self.data_config.sr_density_threshold
571
627
572 @property
628 @property
573 def _srmingapsize(self):
629 def _srmingapsize(self):
574 """temporary compatibility proxy"""
630 """temporary compatibility proxy"""
631 util.nouideprecwarn(
632 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
633 )
575 return self.data_config.sr_min_gap_size
634 return self.data_config.sr_min_gap_size
576
635
577 @property
636 @property
578 def _compute_rank(self):
637 def _compute_rank(self):
579 """temporary compatibility proxy"""
638 """temporary compatibility proxy"""
639 util.nouideprecwarn(
640 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
641 )
580 return self.feature_config.compute_rank
642 return self.feature_config.compute_rank
581
643
582 @property
644 @property
583 def canonical_parent_order(self):
645 def canonical_parent_order(self):
584 """temporary compatibility proxy"""
646 """temporary compatibility proxy"""
647 util.nouideprecwarn(
648 b"use revlog.feature_config.canonical_parent_order",
649 b"6.6",
650 stacklevel=2,
651 )
585 return self.feature_config.canonical_parent_order
652 return self.feature_config.canonical_parent_order
586
653
587 @property
654 @property
588 def _lazydelta(self):
655 def _lazydelta(self):
589 """temporary compatibility proxy"""
656 """temporary compatibility proxy"""
657 util.nouideprecwarn(
658 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
659 )
590 return self.delta_config.lazy_delta
660 return self.delta_config.lazy_delta
591
661
592 @property
662 @property
593 def _lazydeltabase(self):
663 def _lazydeltabase(self):
594 """temporary compatibility proxy"""
664 """temporary compatibility proxy"""
665 util.nouideprecwarn(
666 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
667 )
595 return self.delta_config.lazy_delta_base
668 return self.delta_config.lazy_delta_base
596
669
597 def _init_opts(self):
670 def _init_opts(self):
598 """process options (from above/config) to setup associated default revlog mode
671 """process options (from above/config) to setup associated default revlog mode
599
672
600 These values might be affected when actually reading on disk information.
673 These values might be affected when actually reading on disk information.
601
674
602 The relevant values are returned for use in _loadindex().
675 The relevant values are returned for use in _loadindex().
603
676
604 * newversionflags:
677 * newversionflags:
605 version header to use if we need to create a new revlog
678 version header to use if we need to create a new revlog
606
679
607 * mmapindexthreshold:
680 * mmapindexthreshold:
608 minimal index size for start to use mmap
681 minimal index size for start to use mmap
609
682
610 * force_nodemap:
683 * force_nodemap:
611 force the usage of a "development" version of the nodemap code
684 force the usage of a "development" version of the nodemap code
612 """
685 """
613 opts = self.opener.options
686 opts = self.opener.options
614
687
615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
688 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 new_header = CHANGELOGV2
689 new_header = CHANGELOGV2
617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
690 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 self.feature_config.compute_rank = compute_rank
691 self.feature_config.compute_rank = compute_rank
619 elif b'revlogv2' in opts:
692 elif b'revlogv2' in opts:
620 new_header = REVLOGV2
693 new_header = REVLOGV2
621 elif b'revlogv1' in opts:
694 elif b'revlogv1' in opts:
622 new_header = REVLOGV1 | FLAG_INLINE_DATA
695 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 if b'generaldelta' in opts:
696 if b'generaldelta' in opts:
624 new_header |= FLAG_GENERALDELTA
697 new_header |= FLAG_GENERALDELTA
625 elif b'revlogv0' in self.opener.options:
698 elif b'revlogv0' in self.opener.options:
626 new_header = REVLOGV0
699 new_header = REVLOGV0
627 else:
700 else:
628 new_header = REVLOG_DEFAULT_VERSION
701 new_header = REVLOG_DEFAULT_VERSION
629
702
630 mmapindexthreshold = None
703 mmapindexthreshold = None
631 if self.data_config.mmap_large_index:
704 if self.data_config.mmap_large_index:
632 mmapindexthreshold = self.data_config.mmap_index_threshold
705 mmapindexthreshold = self.data_config.mmap_index_threshold
633 if self.feature_config.enable_ellipsis:
706 if self.feature_config.enable_ellipsis:
634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
707 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
635
708
636 # revlog v0 doesn't have flag processors
709 # revlog v0 doesn't have flag processors
637 for flag, processor in opts.get(b'flagprocessors', {}).items():
710 for flag, processor in opts.get(b'flagprocessors', {}).items():
638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
711 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
639
712
640 chunk_cache_size = self.data_config.chunk_cache_size
713 chunk_cache_size = self.data_config.chunk_cache_size
641 if chunk_cache_size <= 0:
714 if chunk_cache_size <= 0:
642 raise error.RevlogError(
715 raise error.RevlogError(
643 _(b'revlog chunk cache size %r is not greater than 0')
716 _(b'revlog chunk cache size %r is not greater than 0')
644 % chunk_cache_size
717 % chunk_cache_size
645 )
718 )
646 elif chunk_cache_size & (chunk_cache_size - 1):
719 elif chunk_cache_size & (chunk_cache_size - 1):
647 raise error.RevlogError(
720 raise error.RevlogError(
648 _(b'revlog chunk cache size %r is not a power of 2')
721 _(b'revlog chunk cache size %r is not a power of 2')
649 % chunk_cache_size
722 % chunk_cache_size
650 )
723 )
651 force_nodemap = opts.get(b'devel-force-nodemap', False)
724 force_nodemap = opts.get(b'devel-force-nodemap', False)
652 return new_header, mmapindexthreshold, force_nodemap
725 return new_header, mmapindexthreshold, force_nodemap
653
726
654 def _get_data(self, filepath, mmap_threshold, size=None):
727 def _get_data(self, filepath, mmap_threshold, size=None):
655 """return a file content with or without mmap
728 """return a file content with or without mmap
656
729
657 If the file is missing return the empty string"""
730 If the file is missing return the empty string"""
658 try:
731 try:
659 with self.opener(filepath) as fp:
732 with self.opener(filepath) as fp:
660 if mmap_threshold is not None:
733 if mmap_threshold is not None:
661 file_size = self.opener.fstat(fp).st_size
734 file_size = self.opener.fstat(fp).st_size
662 if file_size >= mmap_threshold:
735 if file_size >= mmap_threshold:
663 if size is not None:
736 if size is not None:
664 # avoid potentiel mmap crash
737 # avoid potentiel mmap crash
665 size = min(file_size, size)
738 size = min(file_size, size)
666 # TODO: should .close() to release resources without
739 # TODO: should .close() to release resources without
667 # relying on Python GC
740 # relying on Python GC
668 if size is None:
741 if size is None:
669 return util.buffer(util.mmapread(fp))
742 return util.buffer(util.mmapread(fp))
670 else:
743 else:
671 return util.buffer(util.mmapread(fp, size))
744 return util.buffer(util.mmapread(fp, size))
672 if size is None:
745 if size is None:
673 return fp.read()
746 return fp.read()
674 else:
747 else:
675 return fp.read(size)
748 return fp.read(size)
676 except FileNotFoundError:
749 except FileNotFoundError:
677 return b''
750 return b''
678
751
679 def get_streams(self, max_linkrev, force_inline=False):
752 def get_streams(self, max_linkrev, force_inline=False):
680 """return a list of streams that represent this revlog
753 """return a list of streams that represent this revlog
681
754
682 This is used by stream-clone to do bytes to bytes copies of a repository.
755 This is used by stream-clone to do bytes to bytes copies of a repository.
683
756
684 This streams data for all revisions that refer to a changelog revision up
757 This streams data for all revisions that refer to a changelog revision up
685 to `max_linkrev`.
758 to `max_linkrev`.
686
759
687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
760 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
688
761
689 It returns is a list of three-tuple:
762 It returns is a list of three-tuple:
690
763
691 [
764 [
692 (filename, bytes_stream, stream_size),
765 (filename, bytes_stream, stream_size),
693 …
766 …
694 ]
767 ]
695 """
768 """
696 n = len(self)
769 n = len(self)
697 index = self.index
770 index = self.index
698 while n > 0:
771 while n > 0:
699 linkrev = index[n - 1][4]
772 linkrev = index[n - 1][4]
700 if linkrev < max_linkrev:
773 if linkrev < max_linkrev:
701 break
774 break
702 # note: this loop will rarely go through multiple iterations, since
775 # note: this loop will rarely go through multiple iterations, since
703 # it only traverses commits created during the current streaming
776 # it only traverses commits created during the current streaming
704 # pull operation.
777 # pull operation.
705 #
778 #
706 # If this become a problem, using a binary search should cap the
779 # If this become a problem, using a binary search should cap the
707 # runtime of this.
780 # runtime of this.
708 n = n - 1
781 n = n - 1
709 if n == 0:
782 if n == 0:
710 # no data to send
783 # no data to send
711 return []
784 return []
712 index_size = n * index.entry_size
785 index_size = n * index.entry_size
713 data_size = self.end(n - 1)
786 data_size = self.end(n - 1)
714
787
715 # XXX we might have been split (or stripped) since the object
788 # XXX we might have been split (or stripped) since the object
716 # initialization, We need to close this race too, but having a way to
789 # initialization, We need to close this race too, but having a way to
717 # pre-open the file we feed to the revlog and never closing them before
790 # pre-open the file we feed to the revlog and never closing them before
718 # we are done streaming.
791 # we are done streaming.
719
792
720 if self._inline:
793 if self._inline:
721
794
722 def get_stream():
795 def get_stream():
723 with self._indexfp() as fp:
796 with self._indexfp() as fp:
724 yield None
797 yield None
725 size = index_size + data_size
798 size = index_size + data_size
726 if size <= 65536:
799 if size <= 65536:
727 yield fp.read(size)
800 yield fp.read(size)
728 else:
801 else:
729 yield from util.filechunkiter(fp, limit=size)
802 yield from util.filechunkiter(fp, limit=size)
730
803
731 inline_stream = get_stream()
804 inline_stream = get_stream()
732 next(inline_stream)
805 next(inline_stream)
733 return [
806 return [
734 (self._indexfile, inline_stream, index_size + data_size),
807 (self._indexfile, inline_stream, index_size + data_size),
735 ]
808 ]
736 elif force_inline:
809 elif force_inline:
737
810
738 def get_stream():
811 def get_stream():
739 with self.reading():
812 with self.reading():
740 yield None
813 yield None
741
814
742 for rev in range(n):
815 for rev in range(n):
743 idx = self.index.entry_binary(rev)
816 idx = self.index.entry_binary(rev)
744 if rev == 0 and self._docket is None:
817 if rev == 0 and self._docket is None:
745 # re-inject the inline flag
818 # re-inject the inline flag
746 header = self._format_flags
819 header = self._format_flags
747 header |= self._format_version
820 header |= self._format_version
748 header |= FLAG_INLINE_DATA
821 header |= FLAG_INLINE_DATA
749 header = self.index.pack_header(header)
822 header = self.index.pack_header(header)
750 idx = header + idx
823 idx = header + idx
751 yield idx
824 yield idx
752 yield self._getsegmentforrevs(rev, rev)[1]
825 yield self._getsegmentforrevs(rev, rev)[1]
753
826
754 inline_stream = get_stream()
827 inline_stream = get_stream()
755 next(inline_stream)
828 next(inline_stream)
756 return [
829 return [
757 (self._indexfile, inline_stream, index_size + data_size),
830 (self._indexfile, inline_stream, index_size + data_size),
758 ]
831 ]
759 else:
832 else:
760
833
761 def get_index_stream():
834 def get_index_stream():
762 with self._indexfp() as fp:
835 with self._indexfp() as fp:
763 yield None
836 yield None
764 if index_size <= 65536:
837 if index_size <= 65536:
765 yield fp.read(index_size)
838 yield fp.read(index_size)
766 else:
839 else:
767 yield from util.filechunkiter(fp, limit=index_size)
840 yield from util.filechunkiter(fp, limit=index_size)
768
841
769 def get_data_stream():
842 def get_data_stream():
770 with self._datafp() as fp:
843 with self._datafp() as fp:
771 yield None
844 yield None
772 if data_size <= 65536:
845 if data_size <= 65536:
773 yield fp.read(data_size)
846 yield fp.read(data_size)
774 else:
847 else:
775 yield from util.filechunkiter(fp, limit=data_size)
848 yield from util.filechunkiter(fp, limit=data_size)
776
849
777 index_stream = get_index_stream()
850 index_stream = get_index_stream()
778 next(index_stream)
851 next(index_stream)
779 data_stream = get_data_stream()
852 data_stream = get_data_stream()
780 next(data_stream)
853 next(data_stream)
781 return [
854 return [
782 (self._datafile, data_stream, data_size),
855 (self._datafile, data_stream, data_size),
783 (self._indexfile, index_stream, index_size),
856 (self._indexfile, index_stream, index_size),
784 ]
857 ]
785
858
786 def _loadindex(self, docket=None):
859 def _loadindex(self, docket=None):
787
860
788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
861 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
789
862
790 if self.postfix is not None:
863 if self.postfix is not None:
791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
864 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
865 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
793 entry_point = b'%s.i.a' % self.radix
866 entry_point = b'%s.i.a' % self.radix
794 elif self._try_split and self.opener.exists(self._split_index_file):
867 elif self._try_split and self.opener.exists(self._split_index_file):
795 entry_point = self._split_index_file
868 entry_point = self._split_index_file
796 else:
869 else:
797 entry_point = b'%s.i' % self.radix
870 entry_point = b'%s.i' % self.radix
798
871
799 if docket is not None:
872 if docket is not None:
800 self._docket = docket
873 self._docket = docket
801 self._docket_file = entry_point
874 self._docket_file = entry_point
802 else:
875 else:
803 self._initempty = True
876 self._initempty = True
804 entry_data = self._get_data(entry_point, mmapindexthreshold)
877 entry_data = self._get_data(entry_point, mmapindexthreshold)
805 if len(entry_data) > 0:
878 if len(entry_data) > 0:
806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
879 header = INDEX_HEADER.unpack(entry_data[:4])[0]
807 self._initempty = False
880 self._initempty = False
808 else:
881 else:
809 header = new_header
882 header = new_header
810
883
811 self._format_flags = header & ~0xFFFF
884 self._format_flags = header & ~0xFFFF
812 self._format_version = header & 0xFFFF
885 self._format_version = header & 0xFFFF
813
886
814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
887 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
815 if supported_flags is None:
888 if supported_flags is None:
816 msg = _(b'unknown version (%d) in revlog %s')
889 msg = _(b'unknown version (%d) in revlog %s')
817 msg %= (self._format_version, self.display_id)
890 msg %= (self._format_version, self.display_id)
818 raise error.RevlogError(msg)
891 raise error.RevlogError(msg)
819 elif self._format_flags & ~supported_flags:
892 elif self._format_flags & ~supported_flags:
820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
893 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
821 display_flag = self._format_flags >> 16
894 display_flag = self._format_flags >> 16
822 msg %= (display_flag, self._format_version, self.display_id)
895 msg %= (display_flag, self._format_version, self.display_id)
823 raise error.RevlogError(msg)
896 raise error.RevlogError(msg)
824
897
825 features = FEATURES_BY_VERSION[self._format_version]
898 features = FEATURES_BY_VERSION[self._format_version]
826 self._inline = features[b'inline'](self._format_flags)
899 self._inline = features[b'inline'](self._format_flags)
827 self.delta_config.general_delta = features[b'generaldelta'](
900 self.delta_config.general_delta = features[b'generaldelta'](
828 self._format_flags
901 self._format_flags
829 )
902 )
830 self.feature_config.has_side_data = features[b'sidedata']
903 self.feature_config.has_side_data = features[b'sidedata']
831
904
832 if not features[b'docket']:
905 if not features[b'docket']:
833 self._indexfile = entry_point
906 self._indexfile = entry_point
834 index_data = entry_data
907 index_data = entry_data
835 else:
908 else:
836 self._docket_file = entry_point
909 self._docket_file = entry_point
837 if self._initempty:
910 if self._initempty:
838 self._docket = docketutil.default_docket(self, header)
911 self._docket = docketutil.default_docket(self, header)
839 else:
912 else:
840 self._docket = docketutil.parse_docket(
913 self._docket = docketutil.parse_docket(
841 self, entry_data, use_pending=self._trypending
914 self, entry_data, use_pending=self._trypending
842 )
915 )
843
916
844 if self._docket is not None:
917 if self._docket is not None:
845 self._indexfile = self._docket.index_filepath()
918 self._indexfile = self._docket.index_filepath()
846 index_data = b''
919 index_data = b''
847 index_size = self._docket.index_end
920 index_size = self._docket.index_end
848 if index_size > 0:
921 if index_size > 0:
849 index_data = self._get_data(
922 index_data = self._get_data(
850 self._indexfile, mmapindexthreshold, size=index_size
923 self._indexfile, mmapindexthreshold, size=index_size
851 )
924 )
852 if len(index_data) < index_size:
925 if len(index_data) < index_size:
853 msg = _(b'too few index data for %s: got %d, expected %d')
926 msg = _(b'too few index data for %s: got %d, expected %d')
854 msg %= (self.display_id, len(index_data), index_size)
927 msg %= (self.display_id, len(index_data), index_size)
855 raise error.RevlogError(msg)
928 raise error.RevlogError(msg)
856
929
857 self._inline = False
930 self._inline = False
858 # generaldelta implied by version 2 revlogs.
931 # generaldelta implied by version 2 revlogs.
859 self.delta_config.general_delta = True
932 self.delta_config.general_delta = True
860 # the logic for persistent nodemap will be dealt with within the
933 # the logic for persistent nodemap will be dealt with within the
861 # main docket, so disable it for now.
934 # main docket, so disable it for now.
862 self._nodemap_file = None
935 self._nodemap_file = None
863
936
864 if self._docket is not None:
937 if self._docket is not None:
865 self._datafile = self._docket.data_filepath()
938 self._datafile = self._docket.data_filepath()
866 self._sidedatafile = self._docket.sidedata_filepath()
939 self._sidedatafile = self._docket.sidedata_filepath()
867 elif self.postfix is None:
940 elif self.postfix is None:
868 self._datafile = b'%s.d' % self.radix
941 self._datafile = b'%s.d' % self.radix
869 else:
942 else:
870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
943 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
871
944
872 self.nodeconstants = sha1nodeconstants
945 self.nodeconstants = sha1nodeconstants
873 self.nullid = self.nodeconstants.nullid
946 self.nullid = self.nodeconstants.nullid
874
947
875 # sparse-revlog can't be on without general-delta (issue6056)
948 # sparse-revlog can't be on without general-delta (issue6056)
876 if not self.delta_config.general_delta:
949 if not self.delta_config.general_delta:
877 self.delta_config.sparse_revlog = False
950 self.delta_config.sparse_revlog = False
878
951
879 self._storedeltachains = True
952 self._storedeltachains = True
880
953
881 devel_nodemap = (
954 devel_nodemap = (
882 self._nodemap_file
955 self._nodemap_file
883 and force_nodemap
956 and force_nodemap
884 and parse_index_v1_nodemap is not None
957 and parse_index_v1_nodemap is not None
885 )
958 )
886
959
887 use_rust_index = False
960 use_rust_index = False
888 if rustrevlog is not None:
961 if rustrevlog is not None:
889 if self._nodemap_file is not None:
962 if self._nodemap_file is not None:
890 use_rust_index = True
963 use_rust_index = True
891 else:
964 else:
892 use_rust_index = self.opener.options.get(b'rust.index')
965 use_rust_index = self.opener.options.get(b'rust.index')
893
966
894 self._parse_index = parse_index_v1
967 self._parse_index = parse_index_v1
895 if self._format_version == REVLOGV0:
968 if self._format_version == REVLOGV0:
896 self._parse_index = revlogv0.parse_index_v0
969 self._parse_index = revlogv0.parse_index_v0
897 elif self._format_version == REVLOGV2:
970 elif self._format_version == REVLOGV2:
898 self._parse_index = parse_index_v2
971 self._parse_index = parse_index_v2
899 elif self._format_version == CHANGELOGV2:
972 elif self._format_version == CHANGELOGV2:
900 self._parse_index = parse_index_cl_v2
973 self._parse_index = parse_index_cl_v2
901 elif devel_nodemap:
974 elif devel_nodemap:
902 self._parse_index = parse_index_v1_nodemap
975 self._parse_index = parse_index_v1_nodemap
903 elif use_rust_index:
976 elif use_rust_index:
904 self._parse_index = parse_index_v1_mixed
977 self._parse_index = parse_index_v1_mixed
905 try:
978 try:
906 d = self._parse_index(index_data, self._inline)
979 d = self._parse_index(index_data, self._inline)
907 index, chunkcache = d
980 index, chunkcache = d
908 use_nodemap = (
981 use_nodemap = (
909 not self._inline
982 not self._inline
910 and self._nodemap_file is not None
983 and self._nodemap_file is not None
911 and hasattr(index, 'update_nodemap_data')
984 and hasattr(index, 'update_nodemap_data')
912 )
985 )
913 if use_nodemap:
986 if use_nodemap:
914 nodemap_data = nodemaputil.persisted_data(self)
987 nodemap_data = nodemaputil.persisted_data(self)
915 if nodemap_data is not None:
988 if nodemap_data is not None:
916 docket = nodemap_data[0]
989 docket = nodemap_data[0]
917 if (
990 if (
918 len(d[0]) > docket.tip_rev
991 len(d[0]) > docket.tip_rev
919 and d[0][docket.tip_rev][7] == docket.tip_node
992 and d[0][docket.tip_rev][7] == docket.tip_node
920 ):
993 ):
921 # no changelog tampering
994 # no changelog tampering
922 self._nodemap_docket = docket
995 self._nodemap_docket = docket
923 index.update_nodemap_data(*nodemap_data)
996 index.update_nodemap_data(*nodemap_data)
924 except (ValueError, IndexError):
997 except (ValueError, IndexError):
925 raise error.RevlogError(
998 raise error.RevlogError(
926 _(b"index %s is corrupted") % self.display_id
999 _(b"index %s is corrupted") % self.display_id
927 )
1000 )
928 self.index = index
1001 self.index = index
929 self._segmentfile = randomaccessfile.randomaccessfile(
1002 self._segmentfile = randomaccessfile.randomaccessfile(
930 self.opener,
1003 self.opener,
931 (self._indexfile if self._inline else self._datafile),
1004 (self._indexfile if self._inline else self._datafile),
932 self.data_config.chunk_cache_size,
1005 self.data_config.chunk_cache_size,
933 chunkcache,
1006 chunkcache,
934 )
1007 )
935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1008 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
936 self.opener,
1009 self.opener,
937 self._sidedatafile,
1010 self._sidedatafile,
938 self.data_config.chunk_cache_size,
1011 self.data_config.chunk_cache_size,
939 )
1012 )
940 # revnum -> (chain-length, sum-delta-length)
1013 # revnum -> (chain-length, sum-delta-length)
941 self._chaininfocache = util.lrucachedict(500)
1014 self._chaininfocache = util.lrucachedict(500)
942 # revlog header -> revlog compressor
1015 # revlog header -> revlog compressor
943 self._decompressors = {}
1016 self._decompressors = {}
944
1017
945 def get_revlog(self):
1018 def get_revlog(self):
946 """simple function to mirror API of other not-really-revlog API"""
1019 """simple function to mirror API of other not-really-revlog API"""
947 return self
1020 return self
948
1021
949 @util.propertycache
1022 @util.propertycache
950 def revlog_kind(self):
1023 def revlog_kind(self):
951 return self.target[0]
1024 return self.target[0]
952
1025
953 @util.propertycache
1026 @util.propertycache
954 def display_id(self):
1027 def display_id(self):
955 """The public facing "ID" of the revlog that we use in message"""
1028 """The public facing "ID" of the revlog that we use in message"""
956 if self.revlog_kind == KIND_FILELOG:
1029 if self.revlog_kind == KIND_FILELOG:
957 # Reference the file without the "data/" prefix, so it is familiar
1030 # Reference the file without the "data/" prefix, so it is familiar
958 # to the user.
1031 # to the user.
959 return self.target[1]
1032 return self.target[1]
960 else:
1033 else:
961 return self.radix
1034 return self.radix
962
1035
963 def _get_decompressor(self, t):
1036 def _get_decompressor(self, t):
964 try:
1037 try:
965 compressor = self._decompressors[t]
1038 compressor = self._decompressors[t]
966 except KeyError:
1039 except KeyError:
967 try:
1040 try:
968 engine = util.compengines.forrevlogheader(t)
1041 engine = util.compengines.forrevlogheader(t)
969 compressor = engine.revlogcompressor(
1042 compressor = engine.revlogcompressor(
970 self.feature_config.compression_engine_options
1043 self.feature_config.compression_engine_options
971 )
1044 )
972 self._decompressors[t] = compressor
1045 self._decompressors[t] = compressor
973 except KeyError:
1046 except KeyError:
974 raise error.RevlogError(
1047 raise error.RevlogError(
975 _(b'unknown compression type %s') % binascii.hexlify(t)
1048 _(b'unknown compression type %s') % binascii.hexlify(t)
976 )
1049 )
977 return compressor
1050 return compressor
978
1051
979 @util.propertycache
1052 @util.propertycache
980 def _compressor(self):
1053 def _compressor(self):
981 engine = util.compengines[self.feature_config.compression_engine]
1054 engine = util.compengines[self.feature_config.compression_engine]
982 return engine.revlogcompressor(
1055 return engine.revlogcompressor(
983 self.feature_config.compression_engine_options
1056 self.feature_config.compression_engine_options
984 )
1057 )
985
1058
986 @util.propertycache
1059 @util.propertycache
987 def _decompressor(self):
1060 def _decompressor(self):
988 """the default decompressor"""
1061 """the default decompressor"""
989 if self._docket is None:
1062 if self._docket is None:
990 return None
1063 return None
991 t = self._docket.default_compression_header
1064 t = self._docket.default_compression_header
992 c = self._get_decompressor(t)
1065 c = self._get_decompressor(t)
993 return c.decompress
1066 return c.decompress
994
1067
995 def _indexfp(self):
1068 def _indexfp(self):
996 """file object for the revlog's index file"""
1069 """file object for the revlog's index file"""
997 return self.opener(self._indexfile, mode=b"r")
1070 return self.opener(self._indexfile, mode=b"r")
998
1071
999 def __index_write_fp(self):
1072 def __index_write_fp(self):
1000 # You should not use this directly and use `_writing` instead
1073 # You should not use this directly and use `_writing` instead
1001 try:
1074 try:
1002 f = self.opener(
1075 f = self.opener(
1003 self._indexfile,
1076 self._indexfile,
1004 mode=b"r+",
1077 mode=b"r+",
1005 checkambig=self.data_config.check_ambig,
1078 checkambig=self.data_config.check_ambig,
1006 )
1079 )
1007 if self._docket is None:
1080 if self._docket is None:
1008 f.seek(0, os.SEEK_END)
1081 f.seek(0, os.SEEK_END)
1009 else:
1082 else:
1010 f.seek(self._docket.index_end, os.SEEK_SET)
1083 f.seek(self._docket.index_end, os.SEEK_SET)
1011 return f
1084 return f
1012 except FileNotFoundError:
1085 except FileNotFoundError:
1013 return self.opener(
1086 return self.opener(
1014 self._indexfile,
1087 self._indexfile,
1015 mode=b"w+",
1088 mode=b"w+",
1016 checkambig=self.data_config.check_ambig,
1089 checkambig=self.data_config.check_ambig,
1017 )
1090 )
1018
1091
1019 def __index_new_fp(self):
1092 def __index_new_fp(self):
1020 # You should not use this unless you are upgrading from inline revlog
1093 # You should not use this unless you are upgrading from inline revlog
1021 return self.opener(
1094 return self.opener(
1022 self._indexfile,
1095 self._indexfile,
1023 mode=b"w",
1096 mode=b"w",
1024 checkambig=self.data_config.check_ambig,
1097 checkambig=self.data_config.check_ambig,
1025 atomictemp=True,
1098 atomictemp=True,
1026 )
1099 )
1027
1100
1028 def _datafp(self, mode=b'r'):
1101 def _datafp(self, mode=b'r'):
1029 """file object for the revlog's data file"""
1102 """file object for the revlog's data file"""
1030 return self.opener(self._datafile, mode=mode)
1103 return self.opener(self._datafile, mode=mode)
1031
1104
1032 @contextlib.contextmanager
1105 @contextlib.contextmanager
1033 def _sidedatareadfp(self):
1106 def _sidedatareadfp(self):
1034 """file object suitable to read sidedata"""
1107 """file object suitable to read sidedata"""
1035 if self._writinghandles:
1108 if self._writinghandles:
1036 yield self._writinghandles[2]
1109 yield self._writinghandles[2]
1037 else:
1110 else:
1038 with self.opener(self._sidedatafile) as fp:
1111 with self.opener(self._sidedatafile) as fp:
1039 yield fp
1112 yield fp
1040
1113
1041 def tiprev(self):
1114 def tiprev(self):
1042 return len(self.index) - 1
1115 return len(self.index) - 1
1043
1116
1044 def tip(self):
1117 def tip(self):
1045 return self.node(self.tiprev())
1118 return self.node(self.tiprev())
1046
1119
1047 def __contains__(self, rev):
1120 def __contains__(self, rev):
1048 return 0 <= rev < len(self)
1121 return 0 <= rev < len(self)
1049
1122
1050 def __len__(self):
1123 def __len__(self):
1051 return len(self.index)
1124 return len(self.index)
1052
1125
1053 def __iter__(self):
1126 def __iter__(self):
1054 return iter(range(len(self)))
1127 return iter(range(len(self)))
1055
1128
1056 def revs(self, start=0, stop=None):
1129 def revs(self, start=0, stop=None):
1057 """iterate over all rev in this revlog (from start to stop)"""
1130 """iterate over all rev in this revlog (from start to stop)"""
1058 return storageutil.iterrevs(len(self), start=start, stop=stop)
1131 return storageutil.iterrevs(len(self), start=start, stop=stop)
1059
1132
1060 def hasnode(self, node):
1133 def hasnode(self, node):
1061 try:
1134 try:
1062 self.rev(node)
1135 self.rev(node)
1063 return True
1136 return True
1064 except KeyError:
1137 except KeyError:
1065 return False
1138 return False
1066
1139
1067 def _candelta(self, baserev, rev):
1140 def _candelta(self, baserev, rev):
1068 """whether two revisions (baserev, rev) can be delta-ed or not"""
1141 """whether two revisions (baserev, rev) can be delta-ed or not"""
1069 # Disable delta if either rev requires a content-changing flag
1142 # Disable delta if either rev requires a content-changing flag
1070 # processor (ex. LFS). This is because such flag processor can alter
1143 # processor (ex. LFS). This is because such flag processor can alter
1071 # the rawtext content that the delta will be based on, and two clients
1144 # the rawtext content that the delta will be based on, and two clients
1072 # could have a same revlog node with different flags (i.e. different
1145 # could have a same revlog node with different flags (i.e. different
1073 # rawtext contents) and the delta could be incompatible.
1146 # rawtext contents) and the delta could be incompatible.
1074 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1147 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1075 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1148 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1076 ):
1149 ):
1077 return False
1150 return False
1078 return True
1151 return True
1079
1152
1080 def update_caches(self, transaction):
1153 def update_caches(self, transaction):
1081 """update on disk cache
1154 """update on disk cache
1082
1155
1083 If a transaction is passed, the update may be delayed to transaction
1156 If a transaction is passed, the update may be delayed to transaction
1084 commit."""
1157 commit."""
1085 if self._nodemap_file is not None:
1158 if self._nodemap_file is not None:
1086 if transaction is None:
1159 if transaction is None:
1087 nodemaputil.update_persistent_nodemap(self)
1160 nodemaputil.update_persistent_nodemap(self)
1088 else:
1161 else:
1089 nodemaputil.setup_persistent_nodemap(transaction, self)
1162 nodemaputil.setup_persistent_nodemap(transaction, self)
1090
1163
1091 def clearcaches(self):
1164 def clearcaches(self):
1092 """Clear in-memory caches"""
1165 """Clear in-memory caches"""
1093 self._revisioncache = None
1166 self._revisioncache = None
1094 self._chainbasecache.clear()
1167 self._chainbasecache.clear()
1095 self._segmentfile.clear_cache()
1168 self._segmentfile.clear_cache()
1096 self._segmentfile_sidedata.clear_cache()
1169 self._segmentfile_sidedata.clear_cache()
1097 self._pcache = {}
1170 self._pcache = {}
1098 self._nodemap_docket = None
1171 self._nodemap_docket = None
1099 self.index.clearcaches()
1172 self.index.clearcaches()
1100 # The python code is the one responsible for validating the docket, we
1173 # The python code is the one responsible for validating the docket, we
1101 # end up having to refresh it here.
1174 # end up having to refresh it here.
1102 use_nodemap = (
1175 use_nodemap = (
1103 not self._inline
1176 not self._inline
1104 and self._nodemap_file is not None
1177 and self._nodemap_file is not None
1105 and hasattr(self.index, 'update_nodemap_data')
1178 and hasattr(self.index, 'update_nodemap_data')
1106 )
1179 )
1107 if use_nodemap:
1180 if use_nodemap:
1108 nodemap_data = nodemaputil.persisted_data(self)
1181 nodemap_data = nodemaputil.persisted_data(self)
1109 if nodemap_data is not None:
1182 if nodemap_data is not None:
1110 self._nodemap_docket = nodemap_data[0]
1183 self._nodemap_docket = nodemap_data[0]
1111 self.index.update_nodemap_data(*nodemap_data)
1184 self.index.update_nodemap_data(*nodemap_data)
1112
1185
1113 def rev(self, node):
1186 def rev(self, node):
1114 """return the revision number associated with a <nodeid>"""
1187 """return the revision number associated with a <nodeid>"""
1115 try:
1188 try:
1116 return self.index.rev(node)
1189 return self.index.rev(node)
1117 except TypeError:
1190 except TypeError:
1118 raise
1191 raise
1119 except error.RevlogError:
1192 except error.RevlogError:
1120 # parsers.c radix tree lookup failed
1193 # parsers.c radix tree lookup failed
1121 if (
1194 if (
1122 node == self.nodeconstants.wdirid
1195 node == self.nodeconstants.wdirid
1123 or node in self.nodeconstants.wdirfilenodeids
1196 or node in self.nodeconstants.wdirfilenodeids
1124 ):
1197 ):
1125 raise error.WdirUnsupported
1198 raise error.WdirUnsupported
1126 raise error.LookupError(node, self.display_id, _(b'no node'))
1199 raise error.LookupError(node, self.display_id, _(b'no node'))
1127
1200
1128 # Accessors for index entries.
1201 # Accessors for index entries.
1129
1202
1130 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1131 # are flags.
1204 # are flags.
1132 def start(self, rev):
1205 def start(self, rev):
1133 return int(self.index[rev][0] >> 16)
1206 return int(self.index[rev][0] >> 16)
1134
1207
1135 def sidedata_cut_off(self, rev):
1208 def sidedata_cut_off(self, rev):
1136 sd_cut_off = self.index[rev][8]
1209 sd_cut_off = self.index[rev][8]
1137 if sd_cut_off != 0:
1210 if sd_cut_off != 0:
1138 return sd_cut_off
1211 return sd_cut_off
1139 # This is some annoying dance, because entries without sidedata
1212 # This is some annoying dance, because entries without sidedata
1140 # currently use 0 as their ofsset. (instead of previous-offset +
1213 # currently use 0 as their ofsset. (instead of previous-offset +
1141 # previous-size)
1214 # previous-size)
1142 #
1215 #
1143 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1216 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1144 # In the meantime, we need this.
1217 # In the meantime, we need this.
1145 while 0 <= rev:
1218 while 0 <= rev:
1146 e = self.index[rev]
1219 e = self.index[rev]
1147 if e[9] != 0:
1220 if e[9] != 0:
1148 return e[8] + e[9]
1221 return e[8] + e[9]
1149 rev -= 1
1222 rev -= 1
1150 return 0
1223 return 0
1151
1224
1152 def flags(self, rev):
1225 def flags(self, rev):
1153 return self.index[rev][0] & 0xFFFF
1226 return self.index[rev][0] & 0xFFFF
1154
1227
1155 def length(self, rev):
1228 def length(self, rev):
1156 return self.index[rev][1]
1229 return self.index[rev][1]
1157
1230
1158 def sidedata_length(self, rev):
1231 def sidedata_length(self, rev):
1159 if not self.feature_config.has_side_data:
1232 if not self.feature_config.has_side_data:
1160 return 0
1233 return 0
1161 return self.index[rev][9]
1234 return self.index[rev][9]
1162
1235
1163 def rawsize(self, rev):
1236 def rawsize(self, rev):
1164 """return the length of the uncompressed text for a given revision"""
1237 """return the length of the uncompressed text for a given revision"""
1165 l = self.index[rev][2]
1238 l = self.index[rev][2]
1166 if l >= 0:
1239 if l >= 0:
1167 return l
1240 return l
1168
1241
1169 t = self.rawdata(rev)
1242 t = self.rawdata(rev)
1170 return len(t)
1243 return len(t)
1171
1244
1172 def size(self, rev):
1245 def size(self, rev):
1173 """length of non-raw text (processed by a "read" flag processor)"""
1246 """length of non-raw text (processed by a "read" flag processor)"""
1174 # fast path: if no "read" flag processor could change the content,
1247 # fast path: if no "read" flag processor could change the content,
1175 # size is rawsize. note: ELLIPSIS is known to not change the content.
1248 # size is rawsize. note: ELLIPSIS is known to not change the content.
1176 flags = self.flags(rev)
1249 flags = self.flags(rev)
1177 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1250 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1178 return self.rawsize(rev)
1251 return self.rawsize(rev)
1179
1252
1180 return len(self.revision(rev))
1253 return len(self.revision(rev))
1181
1254
1182 def fast_rank(self, rev):
1255 def fast_rank(self, rev):
1183 """Return the rank of a revision if already known, or None otherwise.
1256 """Return the rank of a revision if already known, or None otherwise.
1184
1257
1185 The rank of a revision is the size of the sub-graph it defines as a
1258 The rank of a revision is the size of the sub-graph it defines as a
1186 head. Equivalently, the rank of a revision `r` is the size of the set
1259 head. Equivalently, the rank of a revision `r` is the size of the set
1187 `ancestors(r)`, `r` included.
1260 `ancestors(r)`, `r` included.
1188
1261
1189 This method returns the rank retrieved from the revlog in constant
1262 This method returns the rank retrieved from the revlog in constant
1190 time. It makes no attempt at computing unknown values for versions of
1263 time. It makes no attempt at computing unknown values for versions of
1191 the revlog which do not persist the rank.
1264 the revlog which do not persist the rank.
1192 """
1265 """
1193 rank = self.index[rev][ENTRY_RANK]
1266 rank = self.index[rev][ENTRY_RANK]
1194 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1267 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1195 return None
1268 return None
1196 if rev == nullrev:
1269 if rev == nullrev:
1197 return 0 # convention
1270 return 0 # convention
1198 return rank
1271 return rank
1199
1272
1200 def chainbase(self, rev):
1273 def chainbase(self, rev):
1201 base = self._chainbasecache.get(rev)
1274 base = self._chainbasecache.get(rev)
1202 if base is not None:
1275 if base is not None:
1203 return base
1276 return base
1204
1277
1205 index = self.index
1278 index = self.index
1206 iterrev = rev
1279 iterrev = rev
1207 base = index[iterrev][3]
1280 base = index[iterrev][3]
1208 while base != iterrev:
1281 while base != iterrev:
1209 iterrev = base
1282 iterrev = base
1210 base = index[iterrev][3]
1283 base = index[iterrev][3]
1211
1284
1212 self._chainbasecache[rev] = base
1285 self._chainbasecache[rev] = base
1213 return base
1286 return base
1214
1287
1215 def linkrev(self, rev):
1288 def linkrev(self, rev):
1216 return self.index[rev][4]
1289 return self.index[rev][4]
1217
1290
1218 def parentrevs(self, rev):
1291 def parentrevs(self, rev):
1219 try:
1292 try:
1220 entry = self.index[rev]
1293 entry = self.index[rev]
1221 except IndexError:
1294 except IndexError:
1222 if rev == wdirrev:
1295 if rev == wdirrev:
1223 raise error.WdirUnsupported
1296 raise error.WdirUnsupported
1224 raise
1297 raise
1225
1298
1226 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1299 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1227 return entry[6], entry[5]
1300 return entry[6], entry[5]
1228 else:
1301 else:
1229 return entry[5], entry[6]
1302 return entry[5], entry[6]
1230
1303
1231 # fast parentrevs(rev) where rev isn't filtered
1304 # fast parentrevs(rev) where rev isn't filtered
1232 _uncheckedparentrevs = parentrevs
1305 _uncheckedparentrevs = parentrevs
1233
1306
1234 def node(self, rev):
1307 def node(self, rev):
1235 try:
1308 try:
1236 return self.index[rev][7]
1309 return self.index[rev][7]
1237 except IndexError:
1310 except IndexError:
1238 if rev == wdirrev:
1311 if rev == wdirrev:
1239 raise error.WdirUnsupported
1312 raise error.WdirUnsupported
1240 raise
1313 raise
1241
1314
1242 # Derived from index values.
1315 # Derived from index values.
1243
1316
1244 def end(self, rev):
1317 def end(self, rev):
1245 return self.start(rev) + self.length(rev)
1318 return self.start(rev) + self.length(rev)
1246
1319
1247 def parents(self, node):
1320 def parents(self, node):
1248 i = self.index
1321 i = self.index
1249 d = i[self.rev(node)]
1322 d = i[self.rev(node)]
1250 # inline node() to avoid function call overhead
1323 # inline node() to avoid function call overhead
1251 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1324 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1252 return i[d[6]][7], i[d[5]][7]
1325 return i[d[6]][7], i[d[5]][7]
1253 else:
1326 else:
1254 return i[d[5]][7], i[d[6]][7]
1327 return i[d[5]][7], i[d[6]][7]
1255
1328
1256 def chainlen(self, rev):
1329 def chainlen(self, rev):
1257 return self._chaininfo(rev)[0]
1330 return self._chaininfo(rev)[0]
1258
1331
1259 def _chaininfo(self, rev):
1332 def _chaininfo(self, rev):
1260 chaininfocache = self._chaininfocache
1333 chaininfocache = self._chaininfocache
1261 if rev in chaininfocache:
1334 if rev in chaininfocache:
1262 return chaininfocache[rev]
1335 return chaininfocache[rev]
1263 index = self.index
1336 index = self.index
1264 generaldelta = self.delta_config.general_delta
1337 generaldelta = self.delta_config.general_delta
1265 iterrev = rev
1338 iterrev = rev
1266 e = index[iterrev]
1339 e = index[iterrev]
1267 clen = 0
1340 clen = 0
1268 compresseddeltalen = 0
1341 compresseddeltalen = 0
1269 while iterrev != e[3]:
1342 while iterrev != e[3]:
1270 clen += 1
1343 clen += 1
1271 compresseddeltalen += e[1]
1344 compresseddeltalen += e[1]
1272 if generaldelta:
1345 if generaldelta:
1273 iterrev = e[3]
1346 iterrev = e[3]
1274 else:
1347 else:
1275 iterrev -= 1
1348 iterrev -= 1
1276 if iterrev in chaininfocache:
1349 if iterrev in chaininfocache:
1277 t = chaininfocache[iterrev]
1350 t = chaininfocache[iterrev]
1278 clen += t[0]
1351 clen += t[0]
1279 compresseddeltalen += t[1]
1352 compresseddeltalen += t[1]
1280 break
1353 break
1281 e = index[iterrev]
1354 e = index[iterrev]
1282 else:
1355 else:
1283 # Add text length of base since decompressing that also takes
1356 # Add text length of base since decompressing that also takes
1284 # work. For cache hits the length is already included.
1357 # work. For cache hits the length is already included.
1285 compresseddeltalen += e[1]
1358 compresseddeltalen += e[1]
1286 r = (clen, compresseddeltalen)
1359 r = (clen, compresseddeltalen)
1287 chaininfocache[rev] = r
1360 chaininfocache[rev] = r
1288 return r
1361 return r
1289
1362
1290 def _deltachain(self, rev, stoprev=None):
1363 def _deltachain(self, rev, stoprev=None):
1291 """Obtain the delta chain for a revision.
1364 """Obtain the delta chain for a revision.
1292
1365
1293 ``stoprev`` specifies a revision to stop at. If not specified, we
1366 ``stoprev`` specifies a revision to stop at. If not specified, we
1294 stop at the base of the chain.
1367 stop at the base of the chain.
1295
1368
1296 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1369 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1297 revs in ascending order and ``stopped`` is a bool indicating whether
1370 revs in ascending order and ``stopped`` is a bool indicating whether
1298 ``stoprev`` was hit.
1371 ``stoprev`` was hit.
1299 """
1372 """
1300 generaldelta = self.delta_config.general_delta
1373 generaldelta = self.delta_config.general_delta
1301 # Try C implementation.
1374 # Try C implementation.
1302 try:
1375 try:
1303 return self.index.deltachain(rev, stoprev, generaldelta)
1376 return self.index.deltachain(rev, stoprev, generaldelta)
1304 except AttributeError:
1377 except AttributeError:
1305 pass
1378 pass
1306
1379
1307 chain = []
1380 chain = []
1308
1381
1309 # Alias to prevent attribute lookup in tight loop.
1382 # Alias to prevent attribute lookup in tight loop.
1310 index = self.index
1383 index = self.index
1311
1384
1312 iterrev = rev
1385 iterrev = rev
1313 e = index[iterrev]
1386 e = index[iterrev]
1314 while iterrev != e[3] and iterrev != stoprev:
1387 while iterrev != e[3] and iterrev != stoprev:
1315 chain.append(iterrev)
1388 chain.append(iterrev)
1316 if generaldelta:
1389 if generaldelta:
1317 iterrev = e[3]
1390 iterrev = e[3]
1318 else:
1391 else:
1319 iterrev -= 1
1392 iterrev -= 1
1320 e = index[iterrev]
1393 e = index[iterrev]
1321
1394
1322 if iterrev == stoprev:
1395 if iterrev == stoprev:
1323 stopped = True
1396 stopped = True
1324 else:
1397 else:
1325 chain.append(iterrev)
1398 chain.append(iterrev)
1326 stopped = False
1399 stopped = False
1327
1400
1328 chain.reverse()
1401 chain.reverse()
1329 return chain, stopped
1402 return chain, stopped
1330
1403
1331 def ancestors(self, revs, stoprev=0, inclusive=False):
1404 def ancestors(self, revs, stoprev=0, inclusive=False):
1332 """Generate the ancestors of 'revs' in reverse revision order.
1405 """Generate the ancestors of 'revs' in reverse revision order.
1333 Does not generate revs lower than stoprev.
1406 Does not generate revs lower than stoprev.
1334
1407
1335 See the documentation for ancestor.lazyancestors for more details."""
1408 See the documentation for ancestor.lazyancestors for more details."""
1336
1409
1337 # first, make sure start revisions aren't filtered
1410 # first, make sure start revisions aren't filtered
1338 revs = list(revs)
1411 revs = list(revs)
1339 checkrev = self.node
1412 checkrev = self.node
1340 for r in revs:
1413 for r in revs:
1341 checkrev(r)
1414 checkrev(r)
1342 # and we're sure ancestors aren't filtered as well
1415 # and we're sure ancestors aren't filtered as well
1343
1416
1344 if rustancestor is not None and self.index.rust_ext_compat:
1417 if rustancestor is not None and self.index.rust_ext_compat:
1345 lazyancestors = rustancestor.LazyAncestors
1418 lazyancestors = rustancestor.LazyAncestors
1346 arg = self.index
1419 arg = self.index
1347 else:
1420 else:
1348 lazyancestors = ancestor.lazyancestors
1421 lazyancestors = ancestor.lazyancestors
1349 arg = self._uncheckedparentrevs
1422 arg = self._uncheckedparentrevs
1350 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1423 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1351
1424
1352 def descendants(self, revs):
1425 def descendants(self, revs):
1353 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1426 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1354
1427
1355 def findcommonmissing(self, common=None, heads=None):
1428 def findcommonmissing(self, common=None, heads=None):
1356 """Return a tuple of the ancestors of common and the ancestors of heads
1429 """Return a tuple of the ancestors of common and the ancestors of heads
1357 that are not ancestors of common. In revset terminology, we return the
1430 that are not ancestors of common. In revset terminology, we return the
1358 tuple:
1431 tuple:
1359
1432
1360 ::common, (::heads) - (::common)
1433 ::common, (::heads) - (::common)
1361
1434
1362 The list is sorted by revision number, meaning it is
1435 The list is sorted by revision number, meaning it is
1363 topologically sorted.
1436 topologically sorted.
1364
1437
1365 'heads' and 'common' are both lists of node IDs. If heads is
1438 'heads' and 'common' are both lists of node IDs. If heads is
1366 not supplied, uses all of the revlog's heads. If common is not
1439 not supplied, uses all of the revlog's heads. If common is not
1367 supplied, uses nullid."""
1440 supplied, uses nullid."""
1368 if common is None:
1441 if common is None:
1369 common = [self.nullid]
1442 common = [self.nullid]
1370 if heads is None:
1443 if heads is None:
1371 heads = self.heads()
1444 heads = self.heads()
1372
1445
1373 common = [self.rev(n) for n in common]
1446 common = [self.rev(n) for n in common]
1374 heads = [self.rev(n) for n in heads]
1447 heads = [self.rev(n) for n in heads]
1375
1448
1376 # we want the ancestors, but inclusive
1449 # we want the ancestors, but inclusive
1377 class lazyset:
1450 class lazyset:
1378 def __init__(self, lazyvalues):
1451 def __init__(self, lazyvalues):
1379 self.addedvalues = set()
1452 self.addedvalues = set()
1380 self.lazyvalues = lazyvalues
1453 self.lazyvalues = lazyvalues
1381
1454
1382 def __contains__(self, value):
1455 def __contains__(self, value):
1383 return value in self.addedvalues or value in self.lazyvalues
1456 return value in self.addedvalues or value in self.lazyvalues
1384
1457
1385 def __iter__(self):
1458 def __iter__(self):
1386 added = self.addedvalues
1459 added = self.addedvalues
1387 for r in added:
1460 for r in added:
1388 yield r
1461 yield r
1389 for r in self.lazyvalues:
1462 for r in self.lazyvalues:
1390 if not r in added:
1463 if not r in added:
1391 yield r
1464 yield r
1392
1465
1393 def add(self, value):
1466 def add(self, value):
1394 self.addedvalues.add(value)
1467 self.addedvalues.add(value)
1395
1468
1396 def update(self, values):
1469 def update(self, values):
1397 self.addedvalues.update(values)
1470 self.addedvalues.update(values)
1398
1471
1399 has = lazyset(self.ancestors(common))
1472 has = lazyset(self.ancestors(common))
1400 has.add(nullrev)
1473 has.add(nullrev)
1401 has.update(common)
1474 has.update(common)
1402
1475
1403 # take all ancestors from heads that aren't in has
1476 # take all ancestors from heads that aren't in has
1404 missing = set()
1477 missing = set()
1405 visit = collections.deque(r for r in heads if r not in has)
1478 visit = collections.deque(r for r in heads if r not in has)
1406 while visit:
1479 while visit:
1407 r = visit.popleft()
1480 r = visit.popleft()
1408 if r in missing:
1481 if r in missing:
1409 continue
1482 continue
1410 else:
1483 else:
1411 missing.add(r)
1484 missing.add(r)
1412 for p in self.parentrevs(r):
1485 for p in self.parentrevs(r):
1413 if p not in has:
1486 if p not in has:
1414 visit.append(p)
1487 visit.append(p)
1415 missing = list(missing)
1488 missing = list(missing)
1416 missing.sort()
1489 missing.sort()
1417 return has, [self.node(miss) for miss in missing]
1490 return has, [self.node(miss) for miss in missing]
1418
1491
1419 def incrementalmissingrevs(self, common=None):
1492 def incrementalmissingrevs(self, common=None):
1420 """Return an object that can be used to incrementally compute the
1493 """Return an object that can be used to incrementally compute the
1421 revision numbers of the ancestors of arbitrary sets that are not
1494 revision numbers of the ancestors of arbitrary sets that are not
1422 ancestors of common. This is an ancestor.incrementalmissingancestors
1495 ancestors of common. This is an ancestor.incrementalmissingancestors
1423 object.
1496 object.
1424
1497
1425 'common' is a list of revision numbers. If common is not supplied, uses
1498 'common' is a list of revision numbers. If common is not supplied, uses
1426 nullrev.
1499 nullrev.
1427 """
1500 """
1428 if common is None:
1501 if common is None:
1429 common = [nullrev]
1502 common = [nullrev]
1430
1503
1431 if rustancestor is not None and self.index.rust_ext_compat:
1504 if rustancestor is not None and self.index.rust_ext_compat:
1432 return rustancestor.MissingAncestors(self.index, common)
1505 return rustancestor.MissingAncestors(self.index, common)
1433 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1506 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1434
1507
1435 def findmissingrevs(self, common=None, heads=None):
1508 def findmissingrevs(self, common=None, heads=None):
1436 """Return the revision numbers of the ancestors of heads that
1509 """Return the revision numbers of the ancestors of heads that
1437 are not ancestors of common.
1510 are not ancestors of common.
1438
1511
1439 More specifically, return a list of revision numbers corresponding to
1512 More specifically, return a list of revision numbers corresponding to
1440 nodes N such that every N satisfies the following constraints:
1513 nodes N such that every N satisfies the following constraints:
1441
1514
1442 1. N is an ancestor of some node in 'heads'
1515 1. N is an ancestor of some node in 'heads'
1443 2. N is not an ancestor of any node in 'common'
1516 2. N is not an ancestor of any node in 'common'
1444
1517
1445 The list is sorted by revision number, meaning it is
1518 The list is sorted by revision number, meaning it is
1446 topologically sorted.
1519 topologically sorted.
1447
1520
1448 'heads' and 'common' are both lists of revision numbers. If heads is
1521 'heads' and 'common' are both lists of revision numbers. If heads is
1449 not supplied, uses all of the revlog's heads. If common is not
1522 not supplied, uses all of the revlog's heads. If common is not
1450 supplied, uses nullid."""
1523 supplied, uses nullid."""
1451 if common is None:
1524 if common is None:
1452 common = [nullrev]
1525 common = [nullrev]
1453 if heads is None:
1526 if heads is None:
1454 heads = self.headrevs()
1527 heads = self.headrevs()
1455
1528
1456 inc = self.incrementalmissingrevs(common=common)
1529 inc = self.incrementalmissingrevs(common=common)
1457 return inc.missingancestors(heads)
1530 return inc.missingancestors(heads)
1458
1531
1459 def findmissing(self, common=None, heads=None):
1532 def findmissing(self, common=None, heads=None):
1460 """Return the ancestors of heads that are not ancestors of common.
1533 """Return the ancestors of heads that are not ancestors of common.
1461
1534
1462 More specifically, return a list of nodes N such that every N
1535 More specifically, return a list of nodes N such that every N
1463 satisfies the following constraints:
1536 satisfies the following constraints:
1464
1537
1465 1. N is an ancestor of some node in 'heads'
1538 1. N is an ancestor of some node in 'heads'
1466 2. N is not an ancestor of any node in 'common'
1539 2. N is not an ancestor of any node in 'common'
1467
1540
1468 The list is sorted by revision number, meaning it is
1541 The list is sorted by revision number, meaning it is
1469 topologically sorted.
1542 topologically sorted.
1470
1543
1471 'heads' and 'common' are both lists of node IDs. If heads is
1544 'heads' and 'common' are both lists of node IDs. If heads is
1472 not supplied, uses all of the revlog's heads. If common is not
1545 not supplied, uses all of the revlog's heads. If common is not
1473 supplied, uses nullid."""
1546 supplied, uses nullid."""
1474 if common is None:
1547 if common is None:
1475 common = [self.nullid]
1548 common = [self.nullid]
1476 if heads is None:
1549 if heads is None:
1477 heads = self.heads()
1550 heads = self.heads()
1478
1551
1479 common = [self.rev(n) for n in common]
1552 common = [self.rev(n) for n in common]
1480 heads = [self.rev(n) for n in heads]
1553 heads = [self.rev(n) for n in heads]
1481
1554
1482 inc = self.incrementalmissingrevs(common=common)
1555 inc = self.incrementalmissingrevs(common=common)
1483 return [self.node(r) for r in inc.missingancestors(heads)]
1556 return [self.node(r) for r in inc.missingancestors(heads)]
1484
1557
1485 def nodesbetween(self, roots=None, heads=None):
1558 def nodesbetween(self, roots=None, heads=None):
1486 """Return a topological path from 'roots' to 'heads'.
1559 """Return a topological path from 'roots' to 'heads'.
1487
1560
1488 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1561 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1489 topologically sorted list of all nodes N that satisfy both of
1562 topologically sorted list of all nodes N that satisfy both of
1490 these constraints:
1563 these constraints:
1491
1564
1492 1. N is a descendant of some node in 'roots'
1565 1. N is a descendant of some node in 'roots'
1493 2. N is an ancestor of some node in 'heads'
1566 2. N is an ancestor of some node in 'heads'
1494
1567
1495 Every node is considered to be both a descendant and an ancestor
1568 Every node is considered to be both a descendant and an ancestor
1496 of itself, so every reachable node in 'roots' and 'heads' will be
1569 of itself, so every reachable node in 'roots' and 'heads' will be
1497 included in 'nodes'.
1570 included in 'nodes'.
1498
1571
1499 'outroots' is the list of reachable nodes in 'roots', i.e., the
1572 'outroots' is the list of reachable nodes in 'roots', i.e., the
1500 subset of 'roots' that is returned in 'nodes'. Likewise,
1573 subset of 'roots' that is returned in 'nodes'. Likewise,
1501 'outheads' is the subset of 'heads' that is also in 'nodes'.
1574 'outheads' is the subset of 'heads' that is also in 'nodes'.
1502
1575
1503 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1576 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1504 unspecified, uses nullid as the only root. If 'heads' is
1577 unspecified, uses nullid as the only root. If 'heads' is
1505 unspecified, uses list of all of the revlog's heads."""
1578 unspecified, uses list of all of the revlog's heads."""
1506 nonodes = ([], [], [])
1579 nonodes = ([], [], [])
1507 if roots is not None:
1580 if roots is not None:
1508 roots = list(roots)
1581 roots = list(roots)
1509 if not roots:
1582 if not roots:
1510 return nonodes
1583 return nonodes
1511 lowestrev = min([self.rev(n) for n in roots])
1584 lowestrev = min([self.rev(n) for n in roots])
1512 else:
1585 else:
1513 roots = [self.nullid] # Everybody's a descendant of nullid
1586 roots = [self.nullid] # Everybody's a descendant of nullid
1514 lowestrev = nullrev
1587 lowestrev = nullrev
1515 if (lowestrev == nullrev) and (heads is None):
1588 if (lowestrev == nullrev) and (heads is None):
1516 # We want _all_ the nodes!
1589 # We want _all_ the nodes!
1517 return (
1590 return (
1518 [self.node(r) for r in self],
1591 [self.node(r) for r in self],
1519 [self.nullid],
1592 [self.nullid],
1520 list(self.heads()),
1593 list(self.heads()),
1521 )
1594 )
1522 if heads is None:
1595 if heads is None:
1523 # All nodes are ancestors, so the latest ancestor is the last
1596 # All nodes are ancestors, so the latest ancestor is the last
1524 # node.
1597 # node.
1525 highestrev = len(self) - 1
1598 highestrev = len(self) - 1
1526 # Set ancestors to None to signal that every node is an ancestor.
1599 # Set ancestors to None to signal that every node is an ancestor.
1527 ancestors = None
1600 ancestors = None
1528 # Set heads to an empty dictionary for later discovery of heads
1601 # Set heads to an empty dictionary for later discovery of heads
1529 heads = {}
1602 heads = {}
1530 else:
1603 else:
1531 heads = list(heads)
1604 heads = list(heads)
1532 if not heads:
1605 if not heads:
1533 return nonodes
1606 return nonodes
1534 ancestors = set()
1607 ancestors = set()
1535 # Turn heads into a dictionary so we can remove 'fake' heads.
1608 # Turn heads into a dictionary so we can remove 'fake' heads.
1536 # Also, later we will be using it to filter out the heads we can't
1609 # Also, later we will be using it to filter out the heads we can't
1537 # find from roots.
1610 # find from roots.
1538 heads = dict.fromkeys(heads, False)
1611 heads = dict.fromkeys(heads, False)
1539 # Start at the top and keep marking parents until we're done.
1612 # Start at the top and keep marking parents until we're done.
1540 nodestotag = set(heads)
1613 nodestotag = set(heads)
1541 # Remember where the top was so we can use it as a limit later.
1614 # Remember where the top was so we can use it as a limit later.
1542 highestrev = max([self.rev(n) for n in nodestotag])
1615 highestrev = max([self.rev(n) for n in nodestotag])
1543 while nodestotag:
1616 while nodestotag:
1544 # grab a node to tag
1617 # grab a node to tag
1545 n = nodestotag.pop()
1618 n = nodestotag.pop()
1546 # Never tag nullid
1619 # Never tag nullid
1547 if n == self.nullid:
1620 if n == self.nullid:
1548 continue
1621 continue
1549 # A node's revision number represents its place in a
1622 # A node's revision number represents its place in a
1550 # topologically sorted list of nodes.
1623 # topologically sorted list of nodes.
1551 r = self.rev(n)
1624 r = self.rev(n)
1552 if r >= lowestrev:
1625 if r >= lowestrev:
1553 if n not in ancestors:
1626 if n not in ancestors:
1554 # If we are possibly a descendant of one of the roots
1627 # If we are possibly a descendant of one of the roots
1555 # and we haven't already been marked as an ancestor
1628 # and we haven't already been marked as an ancestor
1556 ancestors.add(n) # Mark as ancestor
1629 ancestors.add(n) # Mark as ancestor
1557 # Add non-nullid parents to list of nodes to tag.
1630 # Add non-nullid parents to list of nodes to tag.
1558 nodestotag.update(
1631 nodestotag.update(
1559 [p for p in self.parents(n) if p != self.nullid]
1632 [p for p in self.parents(n) if p != self.nullid]
1560 )
1633 )
1561 elif n in heads: # We've seen it before, is it a fake head?
1634 elif n in heads: # We've seen it before, is it a fake head?
1562 # So it is, real heads should not be the ancestors of
1635 # So it is, real heads should not be the ancestors of
1563 # any other heads.
1636 # any other heads.
1564 heads.pop(n)
1637 heads.pop(n)
1565 if not ancestors:
1638 if not ancestors:
1566 return nonodes
1639 return nonodes
1567 # Now that we have our set of ancestors, we want to remove any
1640 # Now that we have our set of ancestors, we want to remove any
1568 # roots that are not ancestors.
1641 # roots that are not ancestors.
1569
1642
1570 # If one of the roots was nullid, everything is included anyway.
1643 # If one of the roots was nullid, everything is included anyway.
1571 if lowestrev > nullrev:
1644 if lowestrev > nullrev:
1572 # But, since we weren't, let's recompute the lowest rev to not
1645 # But, since we weren't, let's recompute the lowest rev to not
1573 # include roots that aren't ancestors.
1646 # include roots that aren't ancestors.
1574
1647
1575 # Filter out roots that aren't ancestors of heads
1648 # Filter out roots that aren't ancestors of heads
1576 roots = [root for root in roots if root in ancestors]
1649 roots = [root for root in roots if root in ancestors]
1577 # Recompute the lowest revision
1650 # Recompute the lowest revision
1578 if roots:
1651 if roots:
1579 lowestrev = min([self.rev(root) for root in roots])
1652 lowestrev = min([self.rev(root) for root in roots])
1580 else:
1653 else:
1581 # No more roots? Return empty list
1654 # No more roots? Return empty list
1582 return nonodes
1655 return nonodes
1583 else:
1656 else:
1584 # We are descending from nullid, and don't need to care about
1657 # We are descending from nullid, and don't need to care about
1585 # any other roots.
1658 # any other roots.
1586 lowestrev = nullrev
1659 lowestrev = nullrev
1587 roots = [self.nullid]
1660 roots = [self.nullid]
1588 # Transform our roots list into a set.
1661 # Transform our roots list into a set.
1589 descendants = set(roots)
1662 descendants = set(roots)
1590 # Also, keep the original roots so we can filter out roots that aren't
1663 # Also, keep the original roots so we can filter out roots that aren't
1591 # 'real' roots (i.e. are descended from other roots).
1664 # 'real' roots (i.e. are descended from other roots).
1592 roots = descendants.copy()
1665 roots = descendants.copy()
1593 # Our topologically sorted list of output nodes.
1666 # Our topologically sorted list of output nodes.
1594 orderedout = []
1667 orderedout = []
1595 # Don't start at nullid since we don't want nullid in our output list,
1668 # Don't start at nullid since we don't want nullid in our output list,
1596 # and if nullid shows up in descendants, empty parents will look like
1669 # and if nullid shows up in descendants, empty parents will look like
1597 # they're descendants.
1670 # they're descendants.
1598 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1671 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1599 n = self.node(r)
1672 n = self.node(r)
1600 isdescendant = False
1673 isdescendant = False
1601 if lowestrev == nullrev: # Everybody is a descendant of nullid
1674 if lowestrev == nullrev: # Everybody is a descendant of nullid
1602 isdescendant = True
1675 isdescendant = True
1603 elif n in descendants:
1676 elif n in descendants:
1604 # n is already a descendant
1677 # n is already a descendant
1605 isdescendant = True
1678 isdescendant = True
1606 # This check only needs to be done here because all the roots
1679 # This check only needs to be done here because all the roots
1607 # will start being marked is descendants before the loop.
1680 # will start being marked is descendants before the loop.
1608 if n in roots:
1681 if n in roots:
1609 # If n was a root, check if it's a 'real' root.
1682 # If n was a root, check if it's a 'real' root.
1610 p = tuple(self.parents(n))
1683 p = tuple(self.parents(n))
1611 # If any of its parents are descendants, it's not a root.
1684 # If any of its parents are descendants, it's not a root.
1612 if (p[0] in descendants) or (p[1] in descendants):
1685 if (p[0] in descendants) or (p[1] in descendants):
1613 roots.remove(n)
1686 roots.remove(n)
1614 else:
1687 else:
1615 p = tuple(self.parents(n))
1688 p = tuple(self.parents(n))
1616 # A node is a descendant if either of its parents are
1689 # A node is a descendant if either of its parents are
1617 # descendants. (We seeded the dependents list with the roots
1690 # descendants. (We seeded the dependents list with the roots
1618 # up there, remember?)
1691 # up there, remember?)
1619 if (p[0] in descendants) or (p[1] in descendants):
1692 if (p[0] in descendants) or (p[1] in descendants):
1620 descendants.add(n)
1693 descendants.add(n)
1621 isdescendant = True
1694 isdescendant = True
1622 if isdescendant and ((ancestors is None) or (n in ancestors)):
1695 if isdescendant and ((ancestors is None) or (n in ancestors)):
1623 # Only include nodes that are both descendants and ancestors.
1696 # Only include nodes that are both descendants and ancestors.
1624 orderedout.append(n)
1697 orderedout.append(n)
1625 if (ancestors is not None) and (n in heads):
1698 if (ancestors is not None) and (n in heads):
1626 # We're trying to figure out which heads are reachable
1699 # We're trying to figure out which heads are reachable
1627 # from roots.
1700 # from roots.
1628 # Mark this head as having been reached
1701 # Mark this head as having been reached
1629 heads[n] = True
1702 heads[n] = True
1630 elif ancestors is None:
1703 elif ancestors is None:
1631 # Otherwise, we're trying to discover the heads.
1704 # Otherwise, we're trying to discover the heads.
1632 # Assume this is a head because if it isn't, the next step
1705 # Assume this is a head because if it isn't, the next step
1633 # will eventually remove it.
1706 # will eventually remove it.
1634 heads[n] = True
1707 heads[n] = True
1635 # But, obviously its parents aren't.
1708 # But, obviously its parents aren't.
1636 for p in self.parents(n):
1709 for p in self.parents(n):
1637 heads.pop(p, None)
1710 heads.pop(p, None)
1638 heads = [head for head, flag in heads.items() if flag]
1711 heads = [head for head, flag in heads.items() if flag]
1639 roots = list(roots)
1712 roots = list(roots)
1640 assert orderedout
1713 assert orderedout
1641 assert roots
1714 assert roots
1642 assert heads
1715 assert heads
1643 return (orderedout, roots, heads)
1716 return (orderedout, roots, heads)
1644
1717
1645 def headrevs(self, revs=None):
1718 def headrevs(self, revs=None):
1646 if revs is None:
1719 if revs is None:
1647 try:
1720 try:
1648 return self.index.headrevs()
1721 return self.index.headrevs()
1649 except AttributeError:
1722 except AttributeError:
1650 return self._headrevs()
1723 return self._headrevs()
1651 if rustdagop is not None and self.index.rust_ext_compat:
1724 if rustdagop is not None and self.index.rust_ext_compat:
1652 return rustdagop.headrevs(self.index, revs)
1725 return rustdagop.headrevs(self.index, revs)
1653 return dagop.headrevs(revs, self._uncheckedparentrevs)
1726 return dagop.headrevs(revs, self._uncheckedparentrevs)
1654
1727
1655 def computephases(self, roots):
1728 def computephases(self, roots):
1656 return self.index.computephasesmapsets(roots)
1729 return self.index.computephasesmapsets(roots)
1657
1730
1658 def _headrevs(self):
1731 def _headrevs(self):
1659 count = len(self)
1732 count = len(self)
1660 if not count:
1733 if not count:
1661 return [nullrev]
1734 return [nullrev]
1662 # we won't iter over filtered rev so nobody is a head at start
1735 # we won't iter over filtered rev so nobody is a head at start
1663 ishead = [0] * (count + 1)
1736 ishead = [0] * (count + 1)
1664 index = self.index
1737 index = self.index
1665 for r in self:
1738 for r in self:
1666 ishead[r] = 1 # I may be an head
1739 ishead[r] = 1 # I may be an head
1667 e = index[r]
1740 e = index[r]
1668 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1741 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1669 return [r for r, val in enumerate(ishead) if val]
1742 return [r for r, val in enumerate(ishead) if val]
1670
1743
1671 def heads(self, start=None, stop=None):
1744 def heads(self, start=None, stop=None):
1672 """return the list of all nodes that have no children
1745 """return the list of all nodes that have no children
1673
1746
1674 if start is specified, only heads that are descendants of
1747 if start is specified, only heads that are descendants of
1675 start will be returned
1748 start will be returned
1676 if stop is specified, it will consider all the revs from stop
1749 if stop is specified, it will consider all the revs from stop
1677 as if they had no children
1750 as if they had no children
1678 """
1751 """
1679 if start is None and stop is None:
1752 if start is None and stop is None:
1680 if not len(self):
1753 if not len(self):
1681 return [self.nullid]
1754 return [self.nullid]
1682 return [self.node(r) for r in self.headrevs()]
1755 return [self.node(r) for r in self.headrevs()]
1683
1756
1684 if start is None:
1757 if start is None:
1685 start = nullrev
1758 start = nullrev
1686 else:
1759 else:
1687 start = self.rev(start)
1760 start = self.rev(start)
1688
1761
1689 stoprevs = {self.rev(n) for n in stop or []}
1762 stoprevs = {self.rev(n) for n in stop or []}
1690
1763
1691 revs = dagop.headrevssubset(
1764 revs = dagop.headrevssubset(
1692 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1765 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1693 )
1766 )
1694
1767
1695 return [self.node(rev) for rev in revs]
1768 return [self.node(rev) for rev in revs]
1696
1769
1697 def children(self, node):
1770 def children(self, node):
1698 """find the children of a given node"""
1771 """find the children of a given node"""
1699 c = []
1772 c = []
1700 p = self.rev(node)
1773 p = self.rev(node)
1701 for r in self.revs(start=p + 1):
1774 for r in self.revs(start=p + 1):
1702 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1775 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1703 if prevs:
1776 if prevs:
1704 for pr in prevs:
1777 for pr in prevs:
1705 if pr == p:
1778 if pr == p:
1706 c.append(self.node(r))
1779 c.append(self.node(r))
1707 elif p == nullrev:
1780 elif p == nullrev:
1708 c.append(self.node(r))
1781 c.append(self.node(r))
1709 return c
1782 return c
1710
1783
1711 def commonancestorsheads(self, a, b):
1784 def commonancestorsheads(self, a, b):
1712 """calculate all the heads of the common ancestors of nodes a and b"""
1785 """calculate all the heads of the common ancestors of nodes a and b"""
1713 a, b = self.rev(a), self.rev(b)
1786 a, b = self.rev(a), self.rev(b)
1714 ancs = self._commonancestorsheads(a, b)
1787 ancs = self._commonancestorsheads(a, b)
1715 return pycompat.maplist(self.node, ancs)
1788 return pycompat.maplist(self.node, ancs)
1716
1789
1717 def _commonancestorsheads(self, *revs):
1790 def _commonancestorsheads(self, *revs):
1718 """calculate all the heads of the common ancestors of revs"""
1791 """calculate all the heads of the common ancestors of revs"""
1719 try:
1792 try:
1720 ancs = self.index.commonancestorsheads(*revs)
1793 ancs = self.index.commonancestorsheads(*revs)
1721 except (AttributeError, OverflowError): # C implementation failed
1794 except (AttributeError, OverflowError): # C implementation failed
1722 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1795 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1723 return ancs
1796 return ancs
1724
1797
1725 def isancestor(self, a, b):
1798 def isancestor(self, a, b):
1726 """return True if node a is an ancestor of node b
1799 """return True if node a is an ancestor of node b
1727
1800
1728 A revision is considered an ancestor of itself."""
1801 A revision is considered an ancestor of itself."""
1729 a, b = self.rev(a), self.rev(b)
1802 a, b = self.rev(a), self.rev(b)
1730 return self.isancestorrev(a, b)
1803 return self.isancestorrev(a, b)
1731
1804
1732 def isancestorrev(self, a, b):
1805 def isancestorrev(self, a, b):
1733 """return True if revision a is an ancestor of revision b
1806 """return True if revision a is an ancestor of revision b
1734
1807
1735 A revision is considered an ancestor of itself.
1808 A revision is considered an ancestor of itself.
1736
1809
1737 The implementation of this is trivial but the use of
1810 The implementation of this is trivial but the use of
1738 reachableroots is not."""
1811 reachableroots is not."""
1739 if a == nullrev:
1812 if a == nullrev:
1740 return True
1813 return True
1741 elif a == b:
1814 elif a == b:
1742 return True
1815 return True
1743 elif a > b:
1816 elif a > b:
1744 return False
1817 return False
1745 return bool(self.reachableroots(a, [b], [a], includepath=False))
1818 return bool(self.reachableroots(a, [b], [a], includepath=False))
1746
1819
1747 def reachableroots(self, minroot, heads, roots, includepath=False):
1820 def reachableroots(self, minroot, heads, roots, includepath=False):
1748 """return (heads(::(<roots> and <roots>::<heads>)))
1821 """return (heads(::(<roots> and <roots>::<heads>)))
1749
1822
1750 If includepath is True, return (<roots>::<heads>)."""
1823 If includepath is True, return (<roots>::<heads>)."""
1751 try:
1824 try:
1752 return self.index.reachableroots2(
1825 return self.index.reachableroots2(
1753 minroot, heads, roots, includepath
1826 minroot, heads, roots, includepath
1754 )
1827 )
1755 except AttributeError:
1828 except AttributeError:
1756 return dagop._reachablerootspure(
1829 return dagop._reachablerootspure(
1757 self.parentrevs, minroot, roots, heads, includepath
1830 self.parentrevs, minroot, roots, heads, includepath
1758 )
1831 )
1759
1832
1760 def ancestor(self, a, b):
1833 def ancestor(self, a, b):
1761 """calculate the "best" common ancestor of nodes a and b"""
1834 """calculate the "best" common ancestor of nodes a and b"""
1762
1835
1763 a, b = self.rev(a), self.rev(b)
1836 a, b = self.rev(a), self.rev(b)
1764 try:
1837 try:
1765 ancs = self.index.ancestors(a, b)
1838 ancs = self.index.ancestors(a, b)
1766 except (AttributeError, OverflowError):
1839 except (AttributeError, OverflowError):
1767 ancs = ancestor.ancestors(self.parentrevs, a, b)
1840 ancs = ancestor.ancestors(self.parentrevs, a, b)
1768 if ancs:
1841 if ancs:
1769 # choose a consistent winner when there's a tie
1842 # choose a consistent winner when there's a tie
1770 return min(map(self.node, ancs))
1843 return min(map(self.node, ancs))
1771 return self.nullid
1844 return self.nullid
1772
1845
1773 def _match(self, id):
1846 def _match(self, id):
1774 if isinstance(id, int):
1847 if isinstance(id, int):
1775 # rev
1848 # rev
1776 return self.node(id)
1849 return self.node(id)
1777 if len(id) == self.nodeconstants.nodelen:
1850 if len(id) == self.nodeconstants.nodelen:
1778 # possibly a binary node
1851 # possibly a binary node
1779 # odds of a binary node being all hex in ASCII are 1 in 10**25
1852 # odds of a binary node being all hex in ASCII are 1 in 10**25
1780 try:
1853 try:
1781 node = id
1854 node = id
1782 self.rev(node) # quick search the index
1855 self.rev(node) # quick search the index
1783 return node
1856 return node
1784 except error.LookupError:
1857 except error.LookupError:
1785 pass # may be partial hex id
1858 pass # may be partial hex id
1786 try:
1859 try:
1787 # str(rev)
1860 # str(rev)
1788 rev = int(id)
1861 rev = int(id)
1789 if b"%d" % rev != id:
1862 if b"%d" % rev != id:
1790 raise ValueError
1863 raise ValueError
1791 if rev < 0:
1864 if rev < 0:
1792 rev = len(self) + rev
1865 rev = len(self) + rev
1793 if rev < 0 or rev >= len(self):
1866 if rev < 0 or rev >= len(self):
1794 raise ValueError
1867 raise ValueError
1795 return self.node(rev)
1868 return self.node(rev)
1796 except (ValueError, OverflowError):
1869 except (ValueError, OverflowError):
1797 pass
1870 pass
1798 if len(id) == 2 * self.nodeconstants.nodelen:
1871 if len(id) == 2 * self.nodeconstants.nodelen:
1799 try:
1872 try:
1800 # a full hex nodeid?
1873 # a full hex nodeid?
1801 node = bin(id)
1874 node = bin(id)
1802 self.rev(node)
1875 self.rev(node)
1803 return node
1876 return node
1804 except (binascii.Error, error.LookupError):
1877 except (binascii.Error, error.LookupError):
1805 pass
1878 pass
1806
1879
1807 def _partialmatch(self, id):
1880 def _partialmatch(self, id):
1808 # we don't care wdirfilenodeids as they should be always full hash
1881 # we don't care wdirfilenodeids as they should be always full hash
1809 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1882 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1810 ambiguous = False
1883 ambiguous = False
1811 try:
1884 try:
1812 partial = self.index.partialmatch(id)
1885 partial = self.index.partialmatch(id)
1813 if partial and self.hasnode(partial):
1886 if partial and self.hasnode(partial):
1814 if maybewdir:
1887 if maybewdir:
1815 # single 'ff...' match in radix tree, ambiguous with wdir
1888 # single 'ff...' match in radix tree, ambiguous with wdir
1816 ambiguous = True
1889 ambiguous = True
1817 else:
1890 else:
1818 return partial
1891 return partial
1819 elif maybewdir:
1892 elif maybewdir:
1820 # no 'ff...' match in radix tree, wdir identified
1893 # no 'ff...' match in radix tree, wdir identified
1821 raise error.WdirUnsupported
1894 raise error.WdirUnsupported
1822 else:
1895 else:
1823 return None
1896 return None
1824 except error.RevlogError:
1897 except error.RevlogError:
1825 # parsers.c radix tree lookup gave multiple matches
1898 # parsers.c radix tree lookup gave multiple matches
1826 # fast path: for unfiltered changelog, radix tree is accurate
1899 # fast path: for unfiltered changelog, radix tree is accurate
1827 if not getattr(self, 'filteredrevs', None):
1900 if not getattr(self, 'filteredrevs', None):
1828 ambiguous = True
1901 ambiguous = True
1829 # fall through to slow path that filters hidden revisions
1902 # fall through to slow path that filters hidden revisions
1830 except (AttributeError, ValueError):
1903 except (AttributeError, ValueError):
1831 # we are pure python, or key is not hex
1904 # we are pure python, or key is not hex
1832 pass
1905 pass
1833 if ambiguous:
1906 if ambiguous:
1834 raise error.AmbiguousPrefixLookupError(
1907 raise error.AmbiguousPrefixLookupError(
1835 id, self.display_id, _(b'ambiguous identifier')
1908 id, self.display_id, _(b'ambiguous identifier')
1836 )
1909 )
1837
1910
1838 if id in self._pcache:
1911 if id in self._pcache:
1839 return self._pcache[id]
1912 return self._pcache[id]
1840
1913
1841 if len(id) <= 40:
1914 if len(id) <= 40:
1842 # hex(node)[:...]
1915 # hex(node)[:...]
1843 l = len(id) // 2 * 2 # grab an even number of digits
1916 l = len(id) // 2 * 2 # grab an even number of digits
1844 try:
1917 try:
1845 # we're dropping the last digit, so let's check that it's hex,
1918 # we're dropping the last digit, so let's check that it's hex,
1846 # to avoid the expensive computation below if it's not
1919 # to avoid the expensive computation below if it's not
1847 if len(id) % 2 > 0:
1920 if len(id) % 2 > 0:
1848 if not (id[-1] in hexdigits):
1921 if not (id[-1] in hexdigits):
1849 return None
1922 return None
1850 prefix = bin(id[:l])
1923 prefix = bin(id[:l])
1851 except binascii.Error:
1924 except binascii.Error:
1852 pass
1925 pass
1853 else:
1926 else:
1854 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1927 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1855 nl = [
1928 nl = [
1856 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1929 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1857 ]
1930 ]
1858 if self.nodeconstants.nullhex.startswith(id):
1931 if self.nodeconstants.nullhex.startswith(id):
1859 nl.append(self.nullid)
1932 nl.append(self.nullid)
1860 if len(nl) > 0:
1933 if len(nl) > 0:
1861 if len(nl) == 1 and not maybewdir:
1934 if len(nl) == 1 and not maybewdir:
1862 self._pcache[id] = nl[0]
1935 self._pcache[id] = nl[0]
1863 return nl[0]
1936 return nl[0]
1864 raise error.AmbiguousPrefixLookupError(
1937 raise error.AmbiguousPrefixLookupError(
1865 id, self.display_id, _(b'ambiguous identifier')
1938 id, self.display_id, _(b'ambiguous identifier')
1866 )
1939 )
1867 if maybewdir:
1940 if maybewdir:
1868 raise error.WdirUnsupported
1941 raise error.WdirUnsupported
1869 return None
1942 return None
1870
1943
1871 def lookup(self, id):
1944 def lookup(self, id):
1872 """locate a node based on:
1945 """locate a node based on:
1873 - revision number or str(revision number)
1946 - revision number or str(revision number)
1874 - nodeid or subset of hex nodeid
1947 - nodeid or subset of hex nodeid
1875 """
1948 """
1876 n = self._match(id)
1949 n = self._match(id)
1877 if n is not None:
1950 if n is not None:
1878 return n
1951 return n
1879 n = self._partialmatch(id)
1952 n = self._partialmatch(id)
1880 if n:
1953 if n:
1881 return n
1954 return n
1882
1955
1883 raise error.LookupError(id, self.display_id, _(b'no match found'))
1956 raise error.LookupError(id, self.display_id, _(b'no match found'))
1884
1957
1885 def shortest(self, node, minlength=1):
1958 def shortest(self, node, minlength=1):
1886 """Find the shortest unambiguous prefix that matches node."""
1959 """Find the shortest unambiguous prefix that matches node."""
1887
1960
1888 def isvalid(prefix):
1961 def isvalid(prefix):
1889 try:
1962 try:
1890 matchednode = self._partialmatch(prefix)
1963 matchednode = self._partialmatch(prefix)
1891 except error.AmbiguousPrefixLookupError:
1964 except error.AmbiguousPrefixLookupError:
1892 return False
1965 return False
1893 except error.WdirUnsupported:
1966 except error.WdirUnsupported:
1894 # single 'ff...' match
1967 # single 'ff...' match
1895 return True
1968 return True
1896 if matchednode is None:
1969 if matchednode is None:
1897 raise error.LookupError(node, self.display_id, _(b'no node'))
1970 raise error.LookupError(node, self.display_id, _(b'no node'))
1898 return True
1971 return True
1899
1972
1900 def maybewdir(prefix):
1973 def maybewdir(prefix):
1901 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1974 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1902
1975
1903 hexnode = hex(node)
1976 hexnode = hex(node)
1904
1977
1905 def disambiguate(hexnode, minlength):
1978 def disambiguate(hexnode, minlength):
1906 """Disambiguate against wdirid."""
1979 """Disambiguate against wdirid."""
1907 for length in range(minlength, len(hexnode) + 1):
1980 for length in range(minlength, len(hexnode) + 1):
1908 prefix = hexnode[:length]
1981 prefix = hexnode[:length]
1909 if not maybewdir(prefix):
1982 if not maybewdir(prefix):
1910 return prefix
1983 return prefix
1911
1984
1912 if not getattr(self, 'filteredrevs', None):
1985 if not getattr(self, 'filteredrevs', None):
1913 try:
1986 try:
1914 length = max(self.index.shortest(node), minlength)
1987 length = max(self.index.shortest(node), minlength)
1915 return disambiguate(hexnode, length)
1988 return disambiguate(hexnode, length)
1916 except error.RevlogError:
1989 except error.RevlogError:
1917 if node != self.nodeconstants.wdirid:
1990 if node != self.nodeconstants.wdirid:
1918 raise error.LookupError(
1991 raise error.LookupError(
1919 node, self.display_id, _(b'no node')
1992 node, self.display_id, _(b'no node')
1920 )
1993 )
1921 except AttributeError:
1994 except AttributeError:
1922 # Fall through to pure code
1995 # Fall through to pure code
1923 pass
1996 pass
1924
1997
1925 if node == self.nodeconstants.wdirid:
1998 if node == self.nodeconstants.wdirid:
1926 for length in range(minlength, len(hexnode) + 1):
1999 for length in range(minlength, len(hexnode) + 1):
1927 prefix = hexnode[:length]
2000 prefix = hexnode[:length]
1928 if isvalid(prefix):
2001 if isvalid(prefix):
1929 return prefix
2002 return prefix
1930
2003
1931 for length in range(minlength, len(hexnode) + 1):
2004 for length in range(minlength, len(hexnode) + 1):
1932 prefix = hexnode[:length]
2005 prefix = hexnode[:length]
1933 if isvalid(prefix):
2006 if isvalid(prefix):
1934 return disambiguate(hexnode, length)
2007 return disambiguate(hexnode, length)
1935
2008
1936 def cmp(self, node, text):
2009 def cmp(self, node, text):
1937 """compare text with a given file revision
2010 """compare text with a given file revision
1938
2011
1939 returns True if text is different than what is stored.
2012 returns True if text is different than what is stored.
1940 """
2013 """
1941 p1, p2 = self.parents(node)
2014 p1, p2 = self.parents(node)
1942 return storageutil.hashrevisionsha1(text, p1, p2) != node
2015 return storageutil.hashrevisionsha1(text, p1, p2) != node
1943
2016
1944 def _getsegmentforrevs(self, startrev, endrev):
2017 def _getsegmentforrevs(self, startrev, endrev):
1945 """Obtain a segment of raw data corresponding to a range of revisions.
2018 """Obtain a segment of raw data corresponding to a range of revisions.
1946
2019
1947 Accepts the start and end revisions and an optional already-open
2020 Accepts the start and end revisions and an optional already-open
1948 file handle to be used for reading. If the file handle is read, its
2021 file handle to be used for reading. If the file handle is read, its
1949 seek position will not be preserved.
2022 seek position will not be preserved.
1950
2023
1951 Requests for data may be satisfied by a cache.
2024 Requests for data may be satisfied by a cache.
1952
2025
1953 Returns a 2-tuple of (offset, data) for the requested range of
2026 Returns a 2-tuple of (offset, data) for the requested range of
1954 revisions. Offset is the integer offset from the beginning of the
2027 revisions. Offset is the integer offset from the beginning of the
1955 revlog and data is a str or buffer of the raw byte data.
2028 revlog and data is a str or buffer of the raw byte data.
1956
2029
1957 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2030 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1958 to determine where each revision's data begins and ends.
2031 to determine where each revision's data begins and ends.
1959 """
2032 """
1960 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2033 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1961 # (functions are expensive).
2034 # (functions are expensive).
1962 index = self.index
2035 index = self.index
1963 istart = index[startrev]
2036 istart = index[startrev]
1964 start = int(istart[0] >> 16)
2037 start = int(istart[0] >> 16)
1965 if startrev == endrev:
2038 if startrev == endrev:
1966 end = start + istart[1]
2039 end = start + istart[1]
1967 else:
2040 else:
1968 iend = index[endrev]
2041 iend = index[endrev]
1969 end = int(iend[0] >> 16) + iend[1]
2042 end = int(iend[0] >> 16) + iend[1]
1970
2043
1971 if self._inline:
2044 if self._inline:
1972 start += (startrev + 1) * self.index.entry_size
2045 start += (startrev + 1) * self.index.entry_size
1973 end += (endrev + 1) * self.index.entry_size
2046 end += (endrev + 1) * self.index.entry_size
1974 length = end - start
2047 length = end - start
1975
2048
1976 return start, self._segmentfile.read_chunk(start, length)
2049 return start, self._segmentfile.read_chunk(start, length)
1977
2050
1978 def _chunk(self, rev):
2051 def _chunk(self, rev):
1979 """Obtain a single decompressed chunk for a revision.
2052 """Obtain a single decompressed chunk for a revision.
1980
2053
1981 Accepts an integer revision and an optional already-open file handle
2054 Accepts an integer revision and an optional already-open file handle
1982 to be used for reading. If used, the seek position of the file will not
2055 to be used for reading. If used, the seek position of the file will not
1983 be preserved.
2056 be preserved.
1984
2057
1985 Returns a str holding uncompressed data for the requested revision.
2058 Returns a str holding uncompressed data for the requested revision.
1986 """
2059 """
1987 compression_mode = self.index[rev][10]
2060 compression_mode = self.index[rev][10]
1988 data = self._getsegmentforrevs(rev, rev)[1]
2061 data = self._getsegmentforrevs(rev, rev)[1]
1989 if compression_mode == COMP_MODE_PLAIN:
2062 if compression_mode == COMP_MODE_PLAIN:
1990 return data
2063 return data
1991 elif compression_mode == COMP_MODE_DEFAULT:
2064 elif compression_mode == COMP_MODE_DEFAULT:
1992 return self._decompressor(data)
2065 return self._decompressor(data)
1993 elif compression_mode == COMP_MODE_INLINE:
2066 elif compression_mode == COMP_MODE_INLINE:
1994 return self.decompress(data)
2067 return self.decompress(data)
1995 else:
2068 else:
1996 msg = b'unknown compression mode %d'
2069 msg = b'unknown compression mode %d'
1997 msg %= compression_mode
2070 msg %= compression_mode
1998 raise error.RevlogError(msg)
2071 raise error.RevlogError(msg)
1999
2072
2000 def _chunks(self, revs, targetsize=None):
2073 def _chunks(self, revs, targetsize=None):
2001 """Obtain decompressed chunks for the specified revisions.
2074 """Obtain decompressed chunks for the specified revisions.
2002
2075
2003 Accepts an iterable of numeric revisions that are assumed to be in
2076 Accepts an iterable of numeric revisions that are assumed to be in
2004 ascending order. Also accepts an optional already-open file handle
2077 ascending order. Also accepts an optional already-open file handle
2005 to be used for reading. If used, the seek position of the file will
2078 to be used for reading. If used, the seek position of the file will
2006 not be preserved.
2079 not be preserved.
2007
2080
2008 This function is similar to calling ``self._chunk()`` multiple times,
2081 This function is similar to calling ``self._chunk()`` multiple times,
2009 but is faster.
2082 but is faster.
2010
2083
2011 Returns a list with decompressed data for each requested revision.
2084 Returns a list with decompressed data for each requested revision.
2012 """
2085 """
2013 if not revs:
2086 if not revs:
2014 return []
2087 return []
2015 start = self.start
2088 start = self.start
2016 length = self.length
2089 length = self.length
2017 inline = self._inline
2090 inline = self._inline
2018 iosize = self.index.entry_size
2091 iosize = self.index.entry_size
2019 buffer = util.buffer
2092 buffer = util.buffer
2020
2093
2021 l = []
2094 l = []
2022 ladd = l.append
2095 ladd = l.append
2023
2096
2024 if not self.data_config.with_sparse_read:
2097 if not self.data_config.with_sparse_read:
2025 slicedchunks = (revs,)
2098 slicedchunks = (revs,)
2026 else:
2099 else:
2027 slicedchunks = deltautil.slicechunk(
2100 slicedchunks = deltautil.slicechunk(
2028 self, revs, targetsize=targetsize
2101 self, revs, targetsize=targetsize
2029 )
2102 )
2030
2103
2031 for revschunk in slicedchunks:
2104 for revschunk in slicedchunks:
2032 firstrev = revschunk[0]
2105 firstrev = revschunk[0]
2033 # Skip trailing revisions with empty diff
2106 # Skip trailing revisions with empty diff
2034 for lastrev in revschunk[::-1]:
2107 for lastrev in revschunk[::-1]:
2035 if length(lastrev) != 0:
2108 if length(lastrev) != 0:
2036 break
2109 break
2037
2110
2038 try:
2111 try:
2039 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2112 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2040 except OverflowError:
2113 except OverflowError:
2041 # issue4215 - we can't cache a run of chunks greater than
2114 # issue4215 - we can't cache a run of chunks greater than
2042 # 2G on Windows
2115 # 2G on Windows
2043 return [self._chunk(rev) for rev in revschunk]
2116 return [self._chunk(rev) for rev in revschunk]
2044
2117
2045 decomp = self.decompress
2118 decomp = self.decompress
2046 # self._decompressor might be None, but will not be used in that case
2119 # self._decompressor might be None, but will not be used in that case
2047 def_decomp = self._decompressor
2120 def_decomp = self._decompressor
2048 for rev in revschunk:
2121 for rev in revschunk:
2049 chunkstart = start(rev)
2122 chunkstart = start(rev)
2050 if inline:
2123 if inline:
2051 chunkstart += (rev + 1) * iosize
2124 chunkstart += (rev + 1) * iosize
2052 chunklength = length(rev)
2125 chunklength = length(rev)
2053 comp_mode = self.index[rev][10]
2126 comp_mode = self.index[rev][10]
2054 c = buffer(data, chunkstart - offset, chunklength)
2127 c = buffer(data, chunkstart - offset, chunklength)
2055 if comp_mode == COMP_MODE_PLAIN:
2128 if comp_mode == COMP_MODE_PLAIN:
2056 ladd(c)
2129 ladd(c)
2057 elif comp_mode == COMP_MODE_INLINE:
2130 elif comp_mode == COMP_MODE_INLINE:
2058 ladd(decomp(c))
2131 ladd(decomp(c))
2059 elif comp_mode == COMP_MODE_DEFAULT:
2132 elif comp_mode == COMP_MODE_DEFAULT:
2060 ladd(def_decomp(c))
2133 ladd(def_decomp(c))
2061 else:
2134 else:
2062 msg = b'unknown compression mode %d'
2135 msg = b'unknown compression mode %d'
2063 msg %= comp_mode
2136 msg %= comp_mode
2064 raise error.RevlogError(msg)
2137 raise error.RevlogError(msg)
2065
2138
2066 return l
2139 return l
2067
2140
2068 def deltaparent(self, rev):
2141 def deltaparent(self, rev):
2069 """return deltaparent of the given revision"""
2142 """return deltaparent of the given revision"""
2070 base = self.index[rev][3]
2143 base = self.index[rev][3]
2071 if base == rev:
2144 if base == rev:
2072 return nullrev
2145 return nullrev
2073 elif self.delta_config.general_delta:
2146 elif self.delta_config.general_delta:
2074 return base
2147 return base
2075 else:
2148 else:
2076 return rev - 1
2149 return rev - 1
2077
2150
2078 def issnapshot(self, rev):
2151 def issnapshot(self, rev):
2079 """tells whether rev is a snapshot"""
2152 """tells whether rev is a snapshot"""
2080 if not self.delta_config.sparse_revlog:
2153 if not self.delta_config.sparse_revlog:
2081 return self.deltaparent(rev) == nullrev
2154 return self.deltaparent(rev) == nullrev
2082 elif hasattr(self.index, 'issnapshot'):
2155 elif hasattr(self.index, 'issnapshot'):
2083 # directly assign the method to cache the testing and access
2156 # directly assign the method to cache the testing and access
2084 self.issnapshot = self.index.issnapshot
2157 self.issnapshot = self.index.issnapshot
2085 return self.issnapshot(rev)
2158 return self.issnapshot(rev)
2086 if rev == nullrev:
2159 if rev == nullrev:
2087 return True
2160 return True
2088 entry = self.index[rev]
2161 entry = self.index[rev]
2089 base = entry[3]
2162 base = entry[3]
2090 if base == rev:
2163 if base == rev:
2091 return True
2164 return True
2092 if base == nullrev:
2165 if base == nullrev:
2093 return True
2166 return True
2094 p1 = entry[5]
2167 p1 = entry[5]
2095 while self.length(p1) == 0:
2168 while self.length(p1) == 0:
2096 b = self.deltaparent(p1)
2169 b = self.deltaparent(p1)
2097 if b == p1:
2170 if b == p1:
2098 break
2171 break
2099 p1 = b
2172 p1 = b
2100 p2 = entry[6]
2173 p2 = entry[6]
2101 while self.length(p2) == 0:
2174 while self.length(p2) == 0:
2102 b = self.deltaparent(p2)
2175 b = self.deltaparent(p2)
2103 if b == p2:
2176 if b == p2:
2104 break
2177 break
2105 p2 = b
2178 p2 = b
2106 if base == p1 or base == p2:
2179 if base == p1 or base == p2:
2107 return False
2180 return False
2108 return self.issnapshot(base)
2181 return self.issnapshot(base)
2109
2182
2110 def snapshotdepth(self, rev):
2183 def snapshotdepth(self, rev):
2111 """number of snapshot in the chain before this one"""
2184 """number of snapshot in the chain before this one"""
2112 if not self.issnapshot(rev):
2185 if not self.issnapshot(rev):
2113 raise error.ProgrammingError(b'revision %d not a snapshot')
2186 raise error.ProgrammingError(b'revision %d not a snapshot')
2114 return len(self._deltachain(rev)[0]) - 1
2187 return len(self._deltachain(rev)[0]) - 1
2115
2188
2116 def revdiff(self, rev1, rev2):
2189 def revdiff(self, rev1, rev2):
2117 """return or calculate a delta between two revisions
2190 """return or calculate a delta between two revisions
2118
2191
2119 The delta calculated is in binary form and is intended to be written to
2192 The delta calculated is in binary form and is intended to be written to
2120 revlog data directly. So this function needs raw revision data.
2193 revlog data directly. So this function needs raw revision data.
2121 """
2194 """
2122 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2195 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2123 return bytes(self._chunk(rev2))
2196 return bytes(self._chunk(rev2))
2124
2197
2125 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2198 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2126
2199
2127 def revision(self, nodeorrev):
2200 def revision(self, nodeorrev):
2128 """return an uncompressed revision of a given node or revision
2201 """return an uncompressed revision of a given node or revision
2129 number.
2202 number.
2130 """
2203 """
2131 return self._revisiondata(nodeorrev)
2204 return self._revisiondata(nodeorrev)
2132
2205
2133 def sidedata(self, nodeorrev):
2206 def sidedata(self, nodeorrev):
2134 """a map of extra data related to the changeset but not part of the hash
2207 """a map of extra data related to the changeset but not part of the hash
2135
2208
2136 This function currently return a dictionary. However, more advanced
2209 This function currently return a dictionary. However, more advanced
2137 mapping object will likely be used in the future for a more
2210 mapping object will likely be used in the future for a more
2138 efficient/lazy code.
2211 efficient/lazy code.
2139 """
2212 """
2140 # deal with <nodeorrev> argument type
2213 # deal with <nodeorrev> argument type
2141 if isinstance(nodeorrev, int):
2214 if isinstance(nodeorrev, int):
2142 rev = nodeorrev
2215 rev = nodeorrev
2143 else:
2216 else:
2144 rev = self.rev(nodeorrev)
2217 rev = self.rev(nodeorrev)
2145 return self._sidedata(rev)
2218 return self._sidedata(rev)
2146
2219
2147 def _revisiondata(self, nodeorrev, raw=False):
2220 def _revisiondata(self, nodeorrev, raw=False):
2148 # deal with <nodeorrev> argument type
2221 # deal with <nodeorrev> argument type
2149 if isinstance(nodeorrev, int):
2222 if isinstance(nodeorrev, int):
2150 rev = nodeorrev
2223 rev = nodeorrev
2151 node = self.node(rev)
2224 node = self.node(rev)
2152 else:
2225 else:
2153 node = nodeorrev
2226 node = nodeorrev
2154 rev = None
2227 rev = None
2155
2228
2156 # fast path the special `nullid` rev
2229 # fast path the special `nullid` rev
2157 if node == self.nullid:
2230 if node == self.nullid:
2158 return b""
2231 return b""
2159
2232
2160 # ``rawtext`` is the text as stored inside the revlog. Might be the
2233 # ``rawtext`` is the text as stored inside the revlog. Might be the
2161 # revision or might need to be processed to retrieve the revision.
2234 # revision or might need to be processed to retrieve the revision.
2162 rev, rawtext, validated = self._rawtext(node, rev)
2235 rev, rawtext, validated = self._rawtext(node, rev)
2163
2236
2164 if raw and validated:
2237 if raw and validated:
2165 # if we don't want to process the raw text and that raw
2238 # if we don't want to process the raw text and that raw
2166 # text is cached, we can exit early.
2239 # text is cached, we can exit early.
2167 return rawtext
2240 return rawtext
2168 if rev is None:
2241 if rev is None:
2169 rev = self.rev(node)
2242 rev = self.rev(node)
2170 # the revlog's flag for this revision
2243 # the revlog's flag for this revision
2171 # (usually alter its state or content)
2244 # (usually alter its state or content)
2172 flags = self.flags(rev)
2245 flags = self.flags(rev)
2173
2246
2174 if validated and flags == REVIDX_DEFAULT_FLAGS:
2247 if validated and flags == REVIDX_DEFAULT_FLAGS:
2175 # no extra flags set, no flag processor runs, text = rawtext
2248 # no extra flags set, no flag processor runs, text = rawtext
2176 return rawtext
2249 return rawtext
2177
2250
2178 if raw:
2251 if raw:
2179 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2252 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2180 text = rawtext
2253 text = rawtext
2181 else:
2254 else:
2182 r = flagutil.processflagsread(self, rawtext, flags)
2255 r = flagutil.processflagsread(self, rawtext, flags)
2183 text, validatehash = r
2256 text, validatehash = r
2184 if validatehash:
2257 if validatehash:
2185 self.checkhash(text, node, rev=rev)
2258 self.checkhash(text, node, rev=rev)
2186 if not validated:
2259 if not validated:
2187 self._revisioncache = (node, rev, rawtext)
2260 self._revisioncache = (node, rev, rawtext)
2188
2261
2189 return text
2262 return text
2190
2263
2191 def _rawtext(self, node, rev):
2264 def _rawtext(self, node, rev):
2192 """return the possibly unvalidated rawtext for a revision
2265 """return the possibly unvalidated rawtext for a revision
2193
2266
2194 returns (rev, rawtext, validated)
2267 returns (rev, rawtext, validated)
2195 """
2268 """
2196
2269
2197 # revision in the cache (could be useful to apply delta)
2270 # revision in the cache (could be useful to apply delta)
2198 cachedrev = None
2271 cachedrev = None
2199 # An intermediate text to apply deltas to
2272 # An intermediate text to apply deltas to
2200 basetext = None
2273 basetext = None
2201
2274
2202 # Check if we have the entry in cache
2275 # Check if we have the entry in cache
2203 # The cache entry looks like (node, rev, rawtext)
2276 # The cache entry looks like (node, rev, rawtext)
2204 if self._revisioncache:
2277 if self._revisioncache:
2205 if self._revisioncache[0] == node:
2278 if self._revisioncache[0] == node:
2206 return (rev, self._revisioncache[2], True)
2279 return (rev, self._revisioncache[2], True)
2207 cachedrev = self._revisioncache[1]
2280 cachedrev = self._revisioncache[1]
2208
2281
2209 if rev is None:
2282 if rev is None:
2210 rev = self.rev(node)
2283 rev = self.rev(node)
2211
2284
2212 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2285 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2213 if stopped:
2286 if stopped:
2214 basetext = self._revisioncache[2]
2287 basetext = self._revisioncache[2]
2215
2288
2216 # drop cache to save memory, the caller is expected to
2289 # drop cache to save memory, the caller is expected to
2217 # update self._revisioncache after validating the text
2290 # update self._revisioncache after validating the text
2218 self._revisioncache = None
2291 self._revisioncache = None
2219
2292
2220 targetsize = None
2293 targetsize = None
2221 rawsize = self.index[rev][2]
2294 rawsize = self.index[rev][2]
2222 if 0 <= rawsize:
2295 if 0 <= rawsize:
2223 targetsize = 4 * rawsize
2296 targetsize = 4 * rawsize
2224
2297
2225 bins = self._chunks(chain, targetsize=targetsize)
2298 bins = self._chunks(chain, targetsize=targetsize)
2226 if basetext is None:
2299 if basetext is None:
2227 basetext = bytes(bins[0])
2300 basetext = bytes(bins[0])
2228 bins = bins[1:]
2301 bins = bins[1:]
2229
2302
2230 rawtext = mdiff.patches(basetext, bins)
2303 rawtext = mdiff.patches(basetext, bins)
2231 del basetext # let us have a chance to free memory early
2304 del basetext # let us have a chance to free memory early
2232 return (rev, rawtext, False)
2305 return (rev, rawtext, False)
2233
2306
2234 def _sidedata(self, rev):
2307 def _sidedata(self, rev):
2235 """Return the sidedata for a given revision number."""
2308 """Return the sidedata for a given revision number."""
2236 index_entry = self.index[rev]
2309 index_entry = self.index[rev]
2237 sidedata_offset = index_entry[8]
2310 sidedata_offset = index_entry[8]
2238 sidedata_size = index_entry[9]
2311 sidedata_size = index_entry[9]
2239
2312
2240 if self._inline:
2313 if self._inline:
2241 sidedata_offset += self.index.entry_size * (1 + rev)
2314 sidedata_offset += self.index.entry_size * (1 + rev)
2242 if sidedata_size == 0:
2315 if sidedata_size == 0:
2243 return {}
2316 return {}
2244
2317
2245 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2318 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2246 filename = self._sidedatafile
2319 filename = self._sidedatafile
2247 end = self._docket.sidedata_end
2320 end = self._docket.sidedata_end
2248 offset = sidedata_offset
2321 offset = sidedata_offset
2249 length = sidedata_size
2322 length = sidedata_size
2250 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2323 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2251 raise error.RevlogError(m)
2324 raise error.RevlogError(m)
2252
2325
2253 comp_segment = self._segmentfile_sidedata.read_chunk(
2326 comp_segment = self._segmentfile_sidedata.read_chunk(
2254 sidedata_offset, sidedata_size
2327 sidedata_offset, sidedata_size
2255 )
2328 )
2256
2329
2257 comp = self.index[rev][11]
2330 comp = self.index[rev][11]
2258 if comp == COMP_MODE_PLAIN:
2331 if comp == COMP_MODE_PLAIN:
2259 segment = comp_segment
2332 segment = comp_segment
2260 elif comp == COMP_MODE_DEFAULT:
2333 elif comp == COMP_MODE_DEFAULT:
2261 segment = self._decompressor(comp_segment)
2334 segment = self._decompressor(comp_segment)
2262 elif comp == COMP_MODE_INLINE:
2335 elif comp == COMP_MODE_INLINE:
2263 segment = self.decompress(comp_segment)
2336 segment = self.decompress(comp_segment)
2264 else:
2337 else:
2265 msg = b'unknown compression mode %d'
2338 msg = b'unknown compression mode %d'
2266 msg %= comp
2339 msg %= comp
2267 raise error.RevlogError(msg)
2340 raise error.RevlogError(msg)
2268
2341
2269 sidedata = sidedatautil.deserialize_sidedata(segment)
2342 sidedata = sidedatautil.deserialize_sidedata(segment)
2270 return sidedata
2343 return sidedata
2271
2344
2272 def rawdata(self, nodeorrev):
2345 def rawdata(self, nodeorrev):
2273 """return an uncompressed raw data of a given node or revision number."""
2346 """return an uncompressed raw data of a given node or revision number."""
2274 return self._revisiondata(nodeorrev, raw=True)
2347 return self._revisiondata(nodeorrev, raw=True)
2275
2348
2276 def hash(self, text, p1, p2):
2349 def hash(self, text, p1, p2):
2277 """Compute a node hash.
2350 """Compute a node hash.
2278
2351
2279 Available as a function so that subclasses can replace the hash
2352 Available as a function so that subclasses can replace the hash
2280 as needed.
2353 as needed.
2281 """
2354 """
2282 return storageutil.hashrevisionsha1(text, p1, p2)
2355 return storageutil.hashrevisionsha1(text, p1, p2)
2283
2356
2284 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2357 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2285 """Check node hash integrity.
2358 """Check node hash integrity.
2286
2359
2287 Available as a function so that subclasses can extend hash mismatch
2360 Available as a function so that subclasses can extend hash mismatch
2288 behaviors as needed.
2361 behaviors as needed.
2289 """
2362 """
2290 try:
2363 try:
2291 if p1 is None and p2 is None:
2364 if p1 is None and p2 is None:
2292 p1, p2 = self.parents(node)
2365 p1, p2 = self.parents(node)
2293 if node != self.hash(text, p1, p2):
2366 if node != self.hash(text, p1, p2):
2294 # Clear the revision cache on hash failure. The revision cache
2367 # Clear the revision cache on hash failure. The revision cache
2295 # only stores the raw revision and clearing the cache does have
2368 # only stores the raw revision and clearing the cache does have
2296 # the side-effect that we won't have a cache hit when the raw
2369 # the side-effect that we won't have a cache hit when the raw
2297 # revision data is accessed. But this case should be rare and
2370 # revision data is accessed. But this case should be rare and
2298 # it is extra work to teach the cache about the hash
2371 # it is extra work to teach the cache about the hash
2299 # verification state.
2372 # verification state.
2300 if self._revisioncache and self._revisioncache[0] == node:
2373 if self._revisioncache and self._revisioncache[0] == node:
2301 self._revisioncache = None
2374 self._revisioncache = None
2302
2375
2303 revornode = rev
2376 revornode = rev
2304 if revornode is None:
2377 if revornode is None:
2305 revornode = templatefilters.short(hex(node))
2378 revornode = templatefilters.short(hex(node))
2306 raise error.RevlogError(
2379 raise error.RevlogError(
2307 _(b"integrity check failed on %s:%s")
2380 _(b"integrity check failed on %s:%s")
2308 % (self.display_id, pycompat.bytestr(revornode))
2381 % (self.display_id, pycompat.bytestr(revornode))
2309 )
2382 )
2310 except error.RevlogError:
2383 except error.RevlogError:
2311 if self.feature_config.censorable and storageutil.iscensoredtext(
2384 if self.feature_config.censorable and storageutil.iscensoredtext(
2312 text
2385 text
2313 ):
2386 ):
2314 raise error.CensoredNodeError(self.display_id, node, text)
2387 raise error.CensoredNodeError(self.display_id, node, text)
2315 raise
2388 raise
2316
2389
2317 @property
2390 @property
2318 def _split_index_file(self):
2391 def _split_index_file(self):
2319 """the path where to expect the index of an ongoing splitting operation
2392 """the path where to expect the index of an ongoing splitting operation
2320
2393
2321 The file will only exist if a splitting operation is in progress, but
2394 The file will only exist if a splitting operation is in progress, but
2322 it is always expected at the same location."""
2395 it is always expected at the same location."""
2323 parts = self.radix.split(b'/')
2396 parts = self.radix.split(b'/')
2324 if len(parts) > 1:
2397 if len(parts) > 1:
2325 # adds a '-s' prefix to the ``data/` or `meta/` base
2398 # adds a '-s' prefix to the ``data/` or `meta/` base
2326 head = parts[0] + b'-s'
2399 head = parts[0] + b'-s'
2327 mids = parts[1:-1]
2400 mids = parts[1:-1]
2328 tail = parts[-1] + b'.i'
2401 tail = parts[-1] + b'.i'
2329 pieces = [head] + mids + [tail]
2402 pieces = [head] + mids + [tail]
2330 return b'/'.join(pieces)
2403 return b'/'.join(pieces)
2331 else:
2404 else:
2332 # the revlog is stored at the root of the store (changelog or
2405 # the revlog is stored at the root of the store (changelog or
2333 # manifest), no risk of collision.
2406 # manifest), no risk of collision.
2334 return self.radix + b'.i.s'
2407 return self.radix + b'.i.s'
2335
2408
2336 def _enforceinlinesize(self, tr, side_write=True):
2409 def _enforceinlinesize(self, tr, side_write=True):
2337 """Check if the revlog is too big for inline and convert if so.
2410 """Check if the revlog is too big for inline and convert if so.
2338
2411
2339 This should be called after revisions are added to the revlog. If the
2412 This should be called after revisions are added to the revlog. If the
2340 revlog has grown too large to be an inline revlog, it will convert it
2413 revlog has grown too large to be an inline revlog, it will convert it
2341 to use multiple index and data files.
2414 to use multiple index and data files.
2342 """
2415 """
2343 tiprev = len(self) - 1
2416 tiprev = len(self) - 1
2344 total_size = self.start(tiprev) + self.length(tiprev)
2417 total_size = self.start(tiprev) + self.length(tiprev)
2345 if not self._inline or total_size < _maxinline:
2418 if not self._inline or total_size < _maxinline:
2346 return
2419 return
2347
2420
2348 troffset = tr.findoffset(self._indexfile)
2421 troffset = tr.findoffset(self._indexfile)
2349 if troffset is None:
2422 if troffset is None:
2350 raise error.RevlogError(
2423 raise error.RevlogError(
2351 _(b"%s not found in the transaction") % self._indexfile
2424 _(b"%s not found in the transaction") % self._indexfile
2352 )
2425 )
2353 if troffset:
2426 if troffset:
2354 tr.addbackup(self._indexfile, for_offset=True)
2427 tr.addbackup(self._indexfile, for_offset=True)
2355 tr.add(self._datafile, 0)
2428 tr.add(self._datafile, 0)
2356
2429
2357 existing_handles = False
2430 existing_handles = False
2358 if self._writinghandles is not None:
2431 if self._writinghandles is not None:
2359 existing_handles = True
2432 existing_handles = True
2360 fp = self._writinghandles[0]
2433 fp = self._writinghandles[0]
2361 fp.flush()
2434 fp.flush()
2362 fp.close()
2435 fp.close()
2363 # We can't use the cached file handle after close(). So prevent
2436 # We can't use the cached file handle after close(). So prevent
2364 # its usage.
2437 # its usage.
2365 self._writinghandles = None
2438 self._writinghandles = None
2366 self._segmentfile.writing_handle = None
2439 self._segmentfile.writing_handle = None
2367 # No need to deal with sidedata writing handle as it is only
2440 # No need to deal with sidedata writing handle as it is only
2368 # relevant with revlog-v2 which is never inline, not reaching
2441 # relevant with revlog-v2 which is never inline, not reaching
2369 # this code
2442 # this code
2370 if side_write:
2443 if side_write:
2371 old_index_file_path = self._indexfile
2444 old_index_file_path = self._indexfile
2372 new_index_file_path = self._split_index_file
2445 new_index_file_path = self._split_index_file
2373 opener = self.opener
2446 opener = self.opener
2374 weak_self = weakref.ref(self)
2447 weak_self = weakref.ref(self)
2375
2448
2376 # the "split" index replace the real index when the transaction is finalized
2449 # the "split" index replace the real index when the transaction is finalized
2377 def finalize_callback(tr):
2450 def finalize_callback(tr):
2378 opener.rename(
2451 opener.rename(
2379 new_index_file_path,
2452 new_index_file_path,
2380 old_index_file_path,
2453 old_index_file_path,
2381 checkambig=True,
2454 checkambig=True,
2382 )
2455 )
2383 maybe_self = weak_self()
2456 maybe_self = weak_self()
2384 if maybe_self is not None:
2457 if maybe_self is not None:
2385 maybe_self._indexfile = old_index_file_path
2458 maybe_self._indexfile = old_index_file_path
2386
2459
2387 def abort_callback(tr):
2460 def abort_callback(tr):
2388 maybe_self = weak_self()
2461 maybe_self = weak_self()
2389 if maybe_self is not None:
2462 if maybe_self is not None:
2390 maybe_self._indexfile = old_index_file_path
2463 maybe_self._indexfile = old_index_file_path
2391
2464
2392 tr.registertmp(new_index_file_path)
2465 tr.registertmp(new_index_file_path)
2393 if self.target[1] is not None:
2466 if self.target[1] is not None:
2394 callback_id = b'000-revlog-split-%d-%s' % self.target
2467 callback_id = b'000-revlog-split-%d-%s' % self.target
2395 else:
2468 else:
2396 callback_id = b'000-revlog-split-%d' % self.target[0]
2469 callback_id = b'000-revlog-split-%d' % self.target[0]
2397 tr.addfinalize(callback_id, finalize_callback)
2470 tr.addfinalize(callback_id, finalize_callback)
2398 tr.addabort(callback_id, abort_callback)
2471 tr.addabort(callback_id, abort_callback)
2399
2472
2400 new_dfh = self._datafp(b'w+')
2473 new_dfh = self._datafp(b'w+')
2401 new_dfh.truncate(0) # drop any potentially existing data
2474 new_dfh.truncate(0) # drop any potentially existing data
2402 try:
2475 try:
2403 with self.reading():
2476 with self.reading():
2404 for r in self:
2477 for r in self:
2405 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2478 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2406 new_dfh.flush()
2479 new_dfh.flush()
2407
2480
2408 if side_write:
2481 if side_write:
2409 self._indexfile = new_index_file_path
2482 self._indexfile = new_index_file_path
2410 with self.__index_new_fp() as fp:
2483 with self.__index_new_fp() as fp:
2411 self._format_flags &= ~FLAG_INLINE_DATA
2484 self._format_flags &= ~FLAG_INLINE_DATA
2412 self._inline = False
2485 self._inline = False
2413 for i in self:
2486 for i in self:
2414 e = self.index.entry_binary(i)
2487 e = self.index.entry_binary(i)
2415 if i == 0 and self._docket is None:
2488 if i == 0 and self._docket is None:
2416 header = self._format_flags | self._format_version
2489 header = self._format_flags | self._format_version
2417 header = self.index.pack_header(header)
2490 header = self.index.pack_header(header)
2418 e = header + e
2491 e = header + e
2419 fp.write(e)
2492 fp.write(e)
2420 if self._docket is not None:
2493 if self._docket is not None:
2421 self._docket.index_end = fp.tell()
2494 self._docket.index_end = fp.tell()
2422
2495
2423 # If we don't use side-write, the temp file replace the real
2496 # If we don't use side-write, the temp file replace the real
2424 # index when we exit the context manager
2497 # index when we exit the context manager
2425
2498
2426 nodemaputil.setup_persistent_nodemap(tr, self)
2499 nodemaputil.setup_persistent_nodemap(tr, self)
2427 self._segmentfile = randomaccessfile.randomaccessfile(
2500 self._segmentfile = randomaccessfile.randomaccessfile(
2428 self.opener,
2501 self.opener,
2429 self._datafile,
2502 self._datafile,
2430 self.data_config.chunk_cache_size,
2503 self.data_config.chunk_cache_size,
2431 )
2504 )
2432
2505
2433 if existing_handles:
2506 if existing_handles:
2434 # switched from inline to conventional reopen the index
2507 # switched from inline to conventional reopen the index
2435 ifh = self.__index_write_fp()
2508 ifh = self.__index_write_fp()
2436 self._writinghandles = (ifh, new_dfh, None)
2509 self._writinghandles = (ifh, new_dfh, None)
2437 self._segmentfile.writing_handle = new_dfh
2510 self._segmentfile.writing_handle = new_dfh
2438 new_dfh = None
2511 new_dfh = None
2439 # No need to deal with sidedata writing handle as it is only
2512 # No need to deal with sidedata writing handle as it is only
2440 # relevant with revlog-v2 which is never inline, not reaching
2513 # relevant with revlog-v2 which is never inline, not reaching
2441 # this code
2514 # this code
2442 finally:
2515 finally:
2443 if new_dfh is not None:
2516 if new_dfh is not None:
2444 new_dfh.close()
2517 new_dfh.close()
2445
2518
2446 def _nodeduplicatecallback(self, transaction, node):
2519 def _nodeduplicatecallback(self, transaction, node):
2447 """called when trying to add a node already stored."""
2520 """called when trying to add a node already stored."""
2448
2521
2449 @contextlib.contextmanager
2522 @contextlib.contextmanager
2450 def reading(self):
2523 def reading(self):
2451 """Context manager that keeps data and sidedata files open for reading"""
2524 """Context manager that keeps data and sidedata files open for reading"""
2452 if len(self.index) == 0:
2525 if len(self.index) == 0:
2453 yield # nothing to be read
2526 yield # nothing to be read
2454 else:
2527 else:
2455 with self._segmentfile.reading():
2528 with self._segmentfile.reading():
2456 with self._segmentfile_sidedata.reading():
2529 with self._segmentfile_sidedata.reading():
2457 yield
2530 yield
2458
2531
2459 @contextlib.contextmanager
2532 @contextlib.contextmanager
2460 def _writing(self, transaction):
2533 def _writing(self, transaction):
2461 if self._trypending:
2534 if self._trypending:
2462 msg = b'try to write in a `trypending` revlog: %s'
2535 msg = b'try to write in a `trypending` revlog: %s'
2463 msg %= self.display_id
2536 msg %= self.display_id
2464 raise error.ProgrammingError(msg)
2537 raise error.ProgrammingError(msg)
2465 if self._writinghandles is not None:
2538 if self._writinghandles is not None:
2466 yield
2539 yield
2467 else:
2540 else:
2468 ifh = dfh = sdfh = None
2541 ifh = dfh = sdfh = None
2469 try:
2542 try:
2470 r = len(self)
2543 r = len(self)
2471 # opening the data file.
2544 # opening the data file.
2472 dsize = 0
2545 dsize = 0
2473 if r:
2546 if r:
2474 dsize = self.end(r - 1)
2547 dsize = self.end(r - 1)
2475 dfh = None
2548 dfh = None
2476 if not self._inline:
2549 if not self._inline:
2477 try:
2550 try:
2478 dfh = self._datafp(b"r+")
2551 dfh = self._datafp(b"r+")
2479 if self._docket is None:
2552 if self._docket is None:
2480 dfh.seek(0, os.SEEK_END)
2553 dfh.seek(0, os.SEEK_END)
2481 else:
2554 else:
2482 dfh.seek(self._docket.data_end, os.SEEK_SET)
2555 dfh.seek(self._docket.data_end, os.SEEK_SET)
2483 except FileNotFoundError:
2556 except FileNotFoundError:
2484 dfh = self._datafp(b"w+")
2557 dfh = self._datafp(b"w+")
2485 transaction.add(self._datafile, dsize)
2558 transaction.add(self._datafile, dsize)
2486 if self._sidedatafile is not None:
2559 if self._sidedatafile is not None:
2487 # revlog-v2 does not inline, help Pytype
2560 # revlog-v2 does not inline, help Pytype
2488 assert dfh is not None
2561 assert dfh is not None
2489 try:
2562 try:
2490 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2563 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2491 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2564 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2492 except FileNotFoundError:
2565 except FileNotFoundError:
2493 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2566 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2494 transaction.add(
2567 transaction.add(
2495 self._sidedatafile, self._docket.sidedata_end
2568 self._sidedatafile, self._docket.sidedata_end
2496 )
2569 )
2497
2570
2498 # opening the index file.
2571 # opening the index file.
2499 isize = r * self.index.entry_size
2572 isize = r * self.index.entry_size
2500 ifh = self.__index_write_fp()
2573 ifh = self.__index_write_fp()
2501 if self._inline:
2574 if self._inline:
2502 transaction.add(self._indexfile, dsize + isize)
2575 transaction.add(self._indexfile, dsize + isize)
2503 else:
2576 else:
2504 transaction.add(self._indexfile, isize)
2577 transaction.add(self._indexfile, isize)
2505 # exposing all file handle for writing.
2578 # exposing all file handle for writing.
2506 self._writinghandles = (ifh, dfh, sdfh)
2579 self._writinghandles = (ifh, dfh, sdfh)
2507 self._segmentfile.writing_handle = ifh if self._inline else dfh
2580 self._segmentfile.writing_handle = ifh if self._inline else dfh
2508 self._segmentfile_sidedata.writing_handle = sdfh
2581 self._segmentfile_sidedata.writing_handle = sdfh
2509 yield
2582 yield
2510 if self._docket is not None:
2583 if self._docket is not None:
2511 self._write_docket(transaction)
2584 self._write_docket(transaction)
2512 finally:
2585 finally:
2513 self._writinghandles = None
2586 self._writinghandles = None
2514 self._segmentfile.writing_handle = None
2587 self._segmentfile.writing_handle = None
2515 self._segmentfile_sidedata.writing_handle = None
2588 self._segmentfile_sidedata.writing_handle = None
2516 if dfh is not None:
2589 if dfh is not None:
2517 dfh.close()
2590 dfh.close()
2518 if sdfh is not None:
2591 if sdfh is not None:
2519 sdfh.close()
2592 sdfh.close()
2520 # closing the index file last to avoid exposing referent to
2593 # closing the index file last to avoid exposing referent to
2521 # potential unflushed data content.
2594 # potential unflushed data content.
2522 if ifh is not None:
2595 if ifh is not None:
2523 ifh.close()
2596 ifh.close()
2524
2597
2525 def _write_docket(self, transaction):
2598 def _write_docket(self, transaction):
2526 """write the current docket on disk
2599 """write the current docket on disk
2527
2600
2528 Exist as a method to help changelog to implement transaction logic
2601 Exist as a method to help changelog to implement transaction logic
2529
2602
2530 We could also imagine using the same transaction logic for all revlog
2603 We could also imagine using the same transaction logic for all revlog
2531 since docket are cheap."""
2604 since docket are cheap."""
2532 self._docket.write(transaction)
2605 self._docket.write(transaction)
2533
2606
2534 def addrevision(
2607 def addrevision(
2535 self,
2608 self,
2536 text,
2609 text,
2537 transaction,
2610 transaction,
2538 link,
2611 link,
2539 p1,
2612 p1,
2540 p2,
2613 p2,
2541 cachedelta=None,
2614 cachedelta=None,
2542 node=None,
2615 node=None,
2543 flags=REVIDX_DEFAULT_FLAGS,
2616 flags=REVIDX_DEFAULT_FLAGS,
2544 deltacomputer=None,
2617 deltacomputer=None,
2545 sidedata=None,
2618 sidedata=None,
2546 ):
2619 ):
2547 """add a revision to the log
2620 """add a revision to the log
2548
2621
2549 text - the revision data to add
2622 text - the revision data to add
2550 transaction - the transaction object used for rollback
2623 transaction - the transaction object used for rollback
2551 link - the linkrev data to add
2624 link - the linkrev data to add
2552 p1, p2 - the parent nodeids of the revision
2625 p1, p2 - the parent nodeids of the revision
2553 cachedelta - an optional precomputed delta
2626 cachedelta - an optional precomputed delta
2554 node - nodeid of revision; typically node is not specified, and it is
2627 node - nodeid of revision; typically node is not specified, and it is
2555 computed by default as hash(text, p1, p2), however subclasses might
2628 computed by default as hash(text, p1, p2), however subclasses might
2556 use different hashing method (and override checkhash() in such case)
2629 use different hashing method (and override checkhash() in such case)
2557 flags - the known flags to set on the revision
2630 flags - the known flags to set on the revision
2558 deltacomputer - an optional deltacomputer instance shared between
2631 deltacomputer - an optional deltacomputer instance shared between
2559 multiple calls
2632 multiple calls
2560 """
2633 """
2561 if link == nullrev:
2634 if link == nullrev:
2562 raise error.RevlogError(
2635 raise error.RevlogError(
2563 _(b"attempted to add linkrev -1 to %s") % self.display_id
2636 _(b"attempted to add linkrev -1 to %s") % self.display_id
2564 )
2637 )
2565
2638
2566 if sidedata is None:
2639 if sidedata is None:
2567 sidedata = {}
2640 sidedata = {}
2568 elif sidedata and not self.feature_config.has_side_data:
2641 elif sidedata and not self.feature_config.has_side_data:
2569 raise error.ProgrammingError(
2642 raise error.ProgrammingError(
2570 _(b"trying to add sidedata to a revlog who don't support them")
2643 _(b"trying to add sidedata to a revlog who don't support them")
2571 )
2644 )
2572
2645
2573 if flags:
2646 if flags:
2574 node = node or self.hash(text, p1, p2)
2647 node = node or self.hash(text, p1, p2)
2575
2648
2576 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2649 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2577
2650
2578 # If the flag processor modifies the revision data, ignore any provided
2651 # If the flag processor modifies the revision data, ignore any provided
2579 # cachedelta.
2652 # cachedelta.
2580 if rawtext != text:
2653 if rawtext != text:
2581 cachedelta = None
2654 cachedelta = None
2582
2655
2583 if len(rawtext) > _maxentrysize:
2656 if len(rawtext) > _maxentrysize:
2584 raise error.RevlogError(
2657 raise error.RevlogError(
2585 _(
2658 _(
2586 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2659 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2587 )
2660 )
2588 % (self.display_id, len(rawtext))
2661 % (self.display_id, len(rawtext))
2589 )
2662 )
2590
2663
2591 node = node or self.hash(rawtext, p1, p2)
2664 node = node or self.hash(rawtext, p1, p2)
2592 rev = self.index.get_rev(node)
2665 rev = self.index.get_rev(node)
2593 if rev is not None:
2666 if rev is not None:
2594 return rev
2667 return rev
2595
2668
2596 if validatehash:
2669 if validatehash:
2597 self.checkhash(rawtext, node, p1=p1, p2=p2)
2670 self.checkhash(rawtext, node, p1=p1, p2=p2)
2598
2671
2599 return self.addrawrevision(
2672 return self.addrawrevision(
2600 rawtext,
2673 rawtext,
2601 transaction,
2674 transaction,
2602 link,
2675 link,
2603 p1,
2676 p1,
2604 p2,
2677 p2,
2605 node,
2678 node,
2606 flags,
2679 flags,
2607 cachedelta=cachedelta,
2680 cachedelta=cachedelta,
2608 deltacomputer=deltacomputer,
2681 deltacomputer=deltacomputer,
2609 sidedata=sidedata,
2682 sidedata=sidedata,
2610 )
2683 )
2611
2684
2612 def addrawrevision(
2685 def addrawrevision(
2613 self,
2686 self,
2614 rawtext,
2687 rawtext,
2615 transaction,
2688 transaction,
2616 link,
2689 link,
2617 p1,
2690 p1,
2618 p2,
2691 p2,
2619 node,
2692 node,
2620 flags,
2693 flags,
2621 cachedelta=None,
2694 cachedelta=None,
2622 deltacomputer=None,
2695 deltacomputer=None,
2623 sidedata=None,
2696 sidedata=None,
2624 ):
2697 ):
2625 """add a raw revision with known flags, node and parents
2698 """add a raw revision with known flags, node and parents
2626 useful when reusing a revision not stored in this revlog (ex: received
2699 useful when reusing a revision not stored in this revlog (ex: received
2627 over wire, or read from an external bundle).
2700 over wire, or read from an external bundle).
2628 """
2701 """
2629 with self._writing(transaction):
2702 with self._writing(transaction):
2630 return self._addrevision(
2703 return self._addrevision(
2631 node,
2704 node,
2632 rawtext,
2705 rawtext,
2633 transaction,
2706 transaction,
2634 link,
2707 link,
2635 p1,
2708 p1,
2636 p2,
2709 p2,
2637 flags,
2710 flags,
2638 cachedelta,
2711 cachedelta,
2639 deltacomputer=deltacomputer,
2712 deltacomputer=deltacomputer,
2640 sidedata=sidedata,
2713 sidedata=sidedata,
2641 )
2714 )
2642
2715
2643 def compress(self, data):
2716 def compress(self, data):
2644 """Generate a possibly-compressed representation of data."""
2717 """Generate a possibly-compressed representation of data."""
2645 if not data:
2718 if not data:
2646 return b'', data
2719 return b'', data
2647
2720
2648 compressed = self._compressor.compress(data)
2721 compressed = self._compressor.compress(data)
2649
2722
2650 if compressed:
2723 if compressed:
2651 # The revlog compressor added the header in the returned data.
2724 # The revlog compressor added the header in the returned data.
2652 return b'', compressed
2725 return b'', compressed
2653
2726
2654 if data[0:1] == b'\0':
2727 if data[0:1] == b'\0':
2655 return b'', data
2728 return b'', data
2656 return b'u', data
2729 return b'u', data
2657
2730
2658 def decompress(self, data):
2731 def decompress(self, data):
2659 """Decompress a revlog chunk.
2732 """Decompress a revlog chunk.
2660
2733
2661 The chunk is expected to begin with a header identifying the
2734 The chunk is expected to begin with a header identifying the
2662 format type so it can be routed to an appropriate decompressor.
2735 format type so it can be routed to an appropriate decompressor.
2663 """
2736 """
2664 if not data:
2737 if not data:
2665 return data
2738 return data
2666
2739
2667 # Revlogs are read much more frequently than they are written and many
2740 # Revlogs are read much more frequently than they are written and many
2668 # chunks only take microseconds to decompress, so performance is
2741 # chunks only take microseconds to decompress, so performance is
2669 # important here.
2742 # important here.
2670 #
2743 #
2671 # We can make a few assumptions about revlogs:
2744 # We can make a few assumptions about revlogs:
2672 #
2745 #
2673 # 1) the majority of chunks will be compressed (as opposed to inline
2746 # 1) the majority of chunks will be compressed (as opposed to inline
2674 # raw data).
2747 # raw data).
2675 # 2) decompressing *any* data will likely by at least 10x slower than
2748 # 2) decompressing *any* data will likely by at least 10x slower than
2676 # returning raw inline data.
2749 # returning raw inline data.
2677 # 3) we want to prioritize common and officially supported compression
2750 # 3) we want to prioritize common and officially supported compression
2678 # engines
2751 # engines
2679 #
2752 #
2680 # It follows that we want to optimize for "decompress compressed data
2753 # It follows that we want to optimize for "decompress compressed data
2681 # when encoded with common and officially supported compression engines"
2754 # when encoded with common and officially supported compression engines"
2682 # case over "raw data" and "data encoded by less common or non-official
2755 # case over "raw data" and "data encoded by less common or non-official
2683 # compression engines." That is why we have the inline lookup first
2756 # compression engines." That is why we have the inline lookup first
2684 # followed by the compengines lookup.
2757 # followed by the compengines lookup.
2685 #
2758 #
2686 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2759 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2687 # compressed chunks. And this matters for changelog and manifest reads.
2760 # compressed chunks. And this matters for changelog and manifest reads.
2688 t = data[0:1]
2761 t = data[0:1]
2689
2762
2690 if t == b'x':
2763 if t == b'x':
2691 try:
2764 try:
2692 return _zlibdecompress(data)
2765 return _zlibdecompress(data)
2693 except zlib.error as e:
2766 except zlib.error as e:
2694 raise error.RevlogError(
2767 raise error.RevlogError(
2695 _(b'revlog decompress error: %s')
2768 _(b'revlog decompress error: %s')
2696 % stringutil.forcebytestr(e)
2769 % stringutil.forcebytestr(e)
2697 )
2770 )
2698 # '\0' is more common than 'u' so it goes first.
2771 # '\0' is more common than 'u' so it goes first.
2699 elif t == b'\0':
2772 elif t == b'\0':
2700 return data
2773 return data
2701 elif t == b'u':
2774 elif t == b'u':
2702 return util.buffer(data, 1)
2775 return util.buffer(data, 1)
2703
2776
2704 compressor = self._get_decompressor(t)
2777 compressor = self._get_decompressor(t)
2705
2778
2706 return compressor.decompress(data)
2779 return compressor.decompress(data)
2707
2780
2708 def _addrevision(
2781 def _addrevision(
2709 self,
2782 self,
2710 node,
2783 node,
2711 rawtext,
2784 rawtext,
2712 transaction,
2785 transaction,
2713 link,
2786 link,
2714 p1,
2787 p1,
2715 p2,
2788 p2,
2716 flags,
2789 flags,
2717 cachedelta,
2790 cachedelta,
2718 alwayscache=False,
2791 alwayscache=False,
2719 deltacomputer=None,
2792 deltacomputer=None,
2720 sidedata=None,
2793 sidedata=None,
2721 ):
2794 ):
2722 """internal function to add revisions to the log
2795 """internal function to add revisions to the log
2723
2796
2724 see addrevision for argument descriptions.
2797 see addrevision for argument descriptions.
2725
2798
2726 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2799 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2727
2800
2728 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2801 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2729 be used.
2802 be used.
2730
2803
2731 invariants:
2804 invariants:
2732 - rawtext is optional (can be None); if not set, cachedelta must be set.
2805 - rawtext is optional (can be None); if not set, cachedelta must be set.
2733 if both are set, they must correspond to each other.
2806 if both are set, they must correspond to each other.
2734 """
2807 """
2735 if node == self.nullid:
2808 if node == self.nullid:
2736 raise error.RevlogError(
2809 raise error.RevlogError(
2737 _(b"%s: attempt to add null revision") % self.display_id
2810 _(b"%s: attempt to add null revision") % self.display_id
2738 )
2811 )
2739 if (
2812 if (
2740 node == self.nodeconstants.wdirid
2813 node == self.nodeconstants.wdirid
2741 or node in self.nodeconstants.wdirfilenodeids
2814 or node in self.nodeconstants.wdirfilenodeids
2742 ):
2815 ):
2743 raise error.RevlogError(
2816 raise error.RevlogError(
2744 _(b"%s: attempt to add wdir revision") % self.display_id
2817 _(b"%s: attempt to add wdir revision") % self.display_id
2745 )
2818 )
2746 if self._writinghandles is None:
2819 if self._writinghandles is None:
2747 msg = b'adding revision outside `revlog._writing` context'
2820 msg = b'adding revision outside `revlog._writing` context'
2748 raise error.ProgrammingError(msg)
2821 raise error.ProgrammingError(msg)
2749
2822
2750 btext = [rawtext]
2823 btext = [rawtext]
2751
2824
2752 curr = len(self)
2825 curr = len(self)
2753 prev = curr - 1
2826 prev = curr - 1
2754
2827
2755 offset = self._get_data_offset(prev)
2828 offset = self._get_data_offset(prev)
2756
2829
2757 if self._concurrencychecker:
2830 if self._concurrencychecker:
2758 ifh, dfh, sdfh = self._writinghandles
2831 ifh, dfh, sdfh = self._writinghandles
2759 # XXX no checking for the sidedata file
2832 # XXX no checking for the sidedata file
2760 if self._inline:
2833 if self._inline:
2761 # offset is "as if" it were in the .d file, so we need to add on
2834 # offset is "as if" it were in the .d file, so we need to add on
2762 # the size of the entry metadata.
2835 # the size of the entry metadata.
2763 self._concurrencychecker(
2836 self._concurrencychecker(
2764 ifh, self._indexfile, offset + curr * self.index.entry_size
2837 ifh, self._indexfile, offset + curr * self.index.entry_size
2765 )
2838 )
2766 else:
2839 else:
2767 # Entries in the .i are a consistent size.
2840 # Entries in the .i are a consistent size.
2768 self._concurrencychecker(
2841 self._concurrencychecker(
2769 ifh, self._indexfile, curr * self.index.entry_size
2842 ifh, self._indexfile, curr * self.index.entry_size
2770 )
2843 )
2771 self._concurrencychecker(dfh, self._datafile, offset)
2844 self._concurrencychecker(dfh, self._datafile, offset)
2772
2845
2773 p1r, p2r = self.rev(p1), self.rev(p2)
2846 p1r, p2r = self.rev(p1), self.rev(p2)
2774
2847
2775 # full versions are inserted when the needed deltas
2848 # full versions are inserted when the needed deltas
2776 # become comparable to the uncompressed text
2849 # become comparable to the uncompressed text
2777 if rawtext is None:
2850 if rawtext is None:
2778 # need rawtext size, before changed by flag processors, which is
2851 # need rawtext size, before changed by flag processors, which is
2779 # the non-raw size. use revlog explicitly to avoid filelog's extra
2852 # the non-raw size. use revlog explicitly to avoid filelog's extra
2780 # logic that might remove metadata size.
2853 # logic that might remove metadata size.
2781 textlen = mdiff.patchedsize(
2854 textlen = mdiff.patchedsize(
2782 revlog.size(self, cachedelta[0]), cachedelta[1]
2855 revlog.size(self, cachedelta[0]), cachedelta[1]
2783 )
2856 )
2784 else:
2857 else:
2785 textlen = len(rawtext)
2858 textlen = len(rawtext)
2786
2859
2787 if deltacomputer is None:
2860 if deltacomputer is None:
2788 write_debug = None
2861 write_debug = None
2789 if self.delta_config.debug_delta:
2862 if self.delta_config.debug_delta:
2790 write_debug = transaction._report
2863 write_debug = transaction._report
2791 deltacomputer = deltautil.deltacomputer(
2864 deltacomputer = deltautil.deltacomputer(
2792 self, write_debug=write_debug
2865 self, write_debug=write_debug
2793 )
2866 )
2794
2867
2795 if cachedelta is not None and len(cachedelta) == 2:
2868 if cachedelta is not None and len(cachedelta) == 2:
2796 # If the cached delta has no information about how it should be
2869 # If the cached delta has no information about how it should be
2797 # reused, add the default reuse instruction according to the
2870 # reused, add the default reuse instruction according to the
2798 # revlog's configuration.
2871 # revlog's configuration.
2799 if (
2872 if (
2800 self.delta_config.general_delta
2873 self.delta_config.general_delta
2801 and self.delta_config.lazy_delta_base
2874 and self.delta_config.lazy_delta_base
2802 ):
2875 ):
2803 delta_base_reuse = DELTA_BASE_REUSE_TRY
2876 delta_base_reuse = DELTA_BASE_REUSE_TRY
2804 else:
2877 else:
2805 delta_base_reuse = DELTA_BASE_REUSE_NO
2878 delta_base_reuse = DELTA_BASE_REUSE_NO
2806 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2879 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2807
2880
2808 revinfo = revlogutils.revisioninfo(
2881 revinfo = revlogutils.revisioninfo(
2809 node,
2882 node,
2810 p1,
2883 p1,
2811 p2,
2884 p2,
2812 btext,
2885 btext,
2813 textlen,
2886 textlen,
2814 cachedelta,
2887 cachedelta,
2815 flags,
2888 flags,
2816 )
2889 )
2817
2890
2818 deltainfo = deltacomputer.finddeltainfo(revinfo)
2891 deltainfo = deltacomputer.finddeltainfo(revinfo)
2819
2892
2820 compression_mode = COMP_MODE_INLINE
2893 compression_mode = COMP_MODE_INLINE
2821 if self._docket is not None:
2894 if self._docket is not None:
2822 default_comp = self._docket.default_compression_header
2895 default_comp = self._docket.default_compression_header
2823 r = deltautil.delta_compression(default_comp, deltainfo)
2896 r = deltautil.delta_compression(default_comp, deltainfo)
2824 compression_mode, deltainfo = r
2897 compression_mode, deltainfo = r
2825
2898
2826 sidedata_compression_mode = COMP_MODE_INLINE
2899 sidedata_compression_mode = COMP_MODE_INLINE
2827 if sidedata and self.feature_config.has_side_data:
2900 if sidedata and self.feature_config.has_side_data:
2828 sidedata_compression_mode = COMP_MODE_PLAIN
2901 sidedata_compression_mode = COMP_MODE_PLAIN
2829 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2902 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2830 sidedata_offset = self._docket.sidedata_end
2903 sidedata_offset = self._docket.sidedata_end
2831 h, comp_sidedata = self.compress(serialized_sidedata)
2904 h, comp_sidedata = self.compress(serialized_sidedata)
2832 if (
2905 if (
2833 h != b'u'
2906 h != b'u'
2834 and comp_sidedata[0:1] != b'\0'
2907 and comp_sidedata[0:1] != b'\0'
2835 and len(comp_sidedata) < len(serialized_sidedata)
2908 and len(comp_sidedata) < len(serialized_sidedata)
2836 ):
2909 ):
2837 assert not h
2910 assert not h
2838 if (
2911 if (
2839 comp_sidedata[0:1]
2912 comp_sidedata[0:1]
2840 == self._docket.default_compression_header
2913 == self._docket.default_compression_header
2841 ):
2914 ):
2842 sidedata_compression_mode = COMP_MODE_DEFAULT
2915 sidedata_compression_mode = COMP_MODE_DEFAULT
2843 serialized_sidedata = comp_sidedata
2916 serialized_sidedata = comp_sidedata
2844 else:
2917 else:
2845 sidedata_compression_mode = COMP_MODE_INLINE
2918 sidedata_compression_mode = COMP_MODE_INLINE
2846 serialized_sidedata = comp_sidedata
2919 serialized_sidedata = comp_sidedata
2847 else:
2920 else:
2848 serialized_sidedata = b""
2921 serialized_sidedata = b""
2849 # Don't store the offset if the sidedata is empty, that way
2922 # Don't store the offset if the sidedata is empty, that way
2850 # we can easily detect empty sidedata and they will be no different
2923 # we can easily detect empty sidedata and they will be no different
2851 # than ones we manually add.
2924 # than ones we manually add.
2852 sidedata_offset = 0
2925 sidedata_offset = 0
2853
2926
2854 rank = RANK_UNKNOWN
2927 rank = RANK_UNKNOWN
2855 if self.feature_config.compute_rank:
2928 if self.feature_config.compute_rank:
2856 if (p1r, p2r) == (nullrev, nullrev):
2929 if (p1r, p2r) == (nullrev, nullrev):
2857 rank = 1
2930 rank = 1
2858 elif p1r != nullrev and p2r == nullrev:
2931 elif p1r != nullrev and p2r == nullrev:
2859 rank = 1 + self.fast_rank(p1r)
2932 rank = 1 + self.fast_rank(p1r)
2860 elif p1r == nullrev and p2r != nullrev:
2933 elif p1r == nullrev and p2r != nullrev:
2861 rank = 1 + self.fast_rank(p2r)
2934 rank = 1 + self.fast_rank(p2r)
2862 else: # merge node
2935 else: # merge node
2863 if rustdagop is not None and self.index.rust_ext_compat:
2936 if rustdagop is not None and self.index.rust_ext_compat:
2864 rank = rustdagop.rank(self.index, p1r, p2r)
2937 rank = rustdagop.rank(self.index, p1r, p2r)
2865 else:
2938 else:
2866 pmin, pmax = sorted((p1r, p2r))
2939 pmin, pmax = sorted((p1r, p2r))
2867 rank = 1 + self.fast_rank(pmax)
2940 rank = 1 + self.fast_rank(pmax)
2868 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2941 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2869
2942
2870 e = revlogutils.entry(
2943 e = revlogutils.entry(
2871 flags=flags,
2944 flags=flags,
2872 data_offset=offset,
2945 data_offset=offset,
2873 data_compressed_length=deltainfo.deltalen,
2946 data_compressed_length=deltainfo.deltalen,
2874 data_uncompressed_length=textlen,
2947 data_uncompressed_length=textlen,
2875 data_compression_mode=compression_mode,
2948 data_compression_mode=compression_mode,
2876 data_delta_base=deltainfo.base,
2949 data_delta_base=deltainfo.base,
2877 link_rev=link,
2950 link_rev=link,
2878 parent_rev_1=p1r,
2951 parent_rev_1=p1r,
2879 parent_rev_2=p2r,
2952 parent_rev_2=p2r,
2880 node_id=node,
2953 node_id=node,
2881 sidedata_offset=sidedata_offset,
2954 sidedata_offset=sidedata_offset,
2882 sidedata_compressed_length=len(serialized_sidedata),
2955 sidedata_compressed_length=len(serialized_sidedata),
2883 sidedata_compression_mode=sidedata_compression_mode,
2956 sidedata_compression_mode=sidedata_compression_mode,
2884 rank=rank,
2957 rank=rank,
2885 )
2958 )
2886
2959
2887 self.index.append(e)
2960 self.index.append(e)
2888 entry = self.index.entry_binary(curr)
2961 entry = self.index.entry_binary(curr)
2889 if curr == 0 and self._docket is None:
2962 if curr == 0 and self._docket is None:
2890 header = self._format_flags | self._format_version
2963 header = self._format_flags | self._format_version
2891 header = self.index.pack_header(header)
2964 header = self.index.pack_header(header)
2892 entry = header + entry
2965 entry = header + entry
2893 self._writeentry(
2966 self._writeentry(
2894 transaction,
2967 transaction,
2895 entry,
2968 entry,
2896 deltainfo.data,
2969 deltainfo.data,
2897 link,
2970 link,
2898 offset,
2971 offset,
2899 serialized_sidedata,
2972 serialized_sidedata,
2900 sidedata_offset,
2973 sidedata_offset,
2901 )
2974 )
2902
2975
2903 rawtext = btext[0]
2976 rawtext = btext[0]
2904
2977
2905 if alwayscache and rawtext is None:
2978 if alwayscache and rawtext is None:
2906 rawtext = deltacomputer.buildtext(revinfo)
2979 rawtext = deltacomputer.buildtext(revinfo)
2907
2980
2908 if type(rawtext) == bytes: # only accept immutable objects
2981 if type(rawtext) == bytes: # only accept immutable objects
2909 self._revisioncache = (node, curr, rawtext)
2982 self._revisioncache = (node, curr, rawtext)
2910 self._chainbasecache[curr] = deltainfo.chainbase
2983 self._chainbasecache[curr] = deltainfo.chainbase
2911 return curr
2984 return curr
2912
2985
2913 def _get_data_offset(self, prev):
2986 def _get_data_offset(self, prev):
2914 """Returns the current offset in the (in-transaction) data file.
2987 """Returns the current offset in the (in-transaction) data file.
2915 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2988 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2916 file to store that information: since sidedata can be rewritten to the
2989 file to store that information: since sidedata can be rewritten to the
2917 end of the data file within a transaction, you can have cases where, for
2990 end of the data file within a transaction, you can have cases where, for
2918 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2991 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2919 to `n - 1`'s sidedata being written after `n`'s data.
2992 to `n - 1`'s sidedata being written after `n`'s data.
2920
2993
2921 TODO cache this in a docket file before getting out of experimental."""
2994 TODO cache this in a docket file before getting out of experimental."""
2922 if self._docket is None:
2995 if self._docket is None:
2923 return self.end(prev)
2996 return self.end(prev)
2924 else:
2997 else:
2925 return self._docket.data_end
2998 return self._docket.data_end
2926
2999
2927 def _writeentry(
3000 def _writeentry(
2928 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3001 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2929 ):
3002 ):
2930 # Files opened in a+ mode have inconsistent behavior on various
3003 # Files opened in a+ mode have inconsistent behavior on various
2931 # platforms. Windows requires that a file positioning call be made
3004 # platforms. Windows requires that a file positioning call be made
2932 # when the file handle transitions between reads and writes. See
3005 # when the file handle transitions between reads and writes. See
2933 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3006 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2934 # platforms, Python or the platform itself can be buggy. Some versions
3007 # platforms, Python or the platform itself can be buggy. Some versions
2935 # of Solaris have been observed to not append at the end of the file
3008 # of Solaris have been observed to not append at the end of the file
2936 # if the file was seeked to before the end. See issue4943 for more.
3009 # if the file was seeked to before the end. See issue4943 for more.
2937 #
3010 #
2938 # We work around this issue by inserting a seek() before writing.
3011 # We work around this issue by inserting a seek() before writing.
2939 # Note: This is likely not necessary on Python 3. However, because
3012 # Note: This is likely not necessary on Python 3. However, because
2940 # the file handle is reused for reads and may be seeked there, we need
3013 # the file handle is reused for reads and may be seeked there, we need
2941 # to be careful before changing this.
3014 # to be careful before changing this.
2942 if self._writinghandles is None:
3015 if self._writinghandles is None:
2943 msg = b'adding revision outside `revlog._writing` context'
3016 msg = b'adding revision outside `revlog._writing` context'
2944 raise error.ProgrammingError(msg)
3017 raise error.ProgrammingError(msg)
2945 ifh, dfh, sdfh = self._writinghandles
3018 ifh, dfh, sdfh = self._writinghandles
2946 if self._docket is None:
3019 if self._docket is None:
2947 ifh.seek(0, os.SEEK_END)
3020 ifh.seek(0, os.SEEK_END)
2948 else:
3021 else:
2949 ifh.seek(self._docket.index_end, os.SEEK_SET)
3022 ifh.seek(self._docket.index_end, os.SEEK_SET)
2950 if dfh:
3023 if dfh:
2951 if self._docket is None:
3024 if self._docket is None:
2952 dfh.seek(0, os.SEEK_END)
3025 dfh.seek(0, os.SEEK_END)
2953 else:
3026 else:
2954 dfh.seek(self._docket.data_end, os.SEEK_SET)
3027 dfh.seek(self._docket.data_end, os.SEEK_SET)
2955 if sdfh:
3028 if sdfh:
2956 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3029 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2957
3030
2958 curr = len(self) - 1
3031 curr = len(self) - 1
2959 if not self._inline:
3032 if not self._inline:
2960 transaction.add(self._datafile, offset)
3033 transaction.add(self._datafile, offset)
2961 if self._sidedatafile:
3034 if self._sidedatafile:
2962 transaction.add(self._sidedatafile, sidedata_offset)
3035 transaction.add(self._sidedatafile, sidedata_offset)
2963 transaction.add(self._indexfile, curr * len(entry))
3036 transaction.add(self._indexfile, curr * len(entry))
2964 if data[0]:
3037 if data[0]:
2965 dfh.write(data[0])
3038 dfh.write(data[0])
2966 dfh.write(data[1])
3039 dfh.write(data[1])
2967 if sidedata:
3040 if sidedata:
2968 sdfh.write(sidedata)
3041 sdfh.write(sidedata)
2969 ifh.write(entry)
3042 ifh.write(entry)
2970 else:
3043 else:
2971 offset += curr * self.index.entry_size
3044 offset += curr * self.index.entry_size
2972 transaction.add(self._indexfile, offset)
3045 transaction.add(self._indexfile, offset)
2973 ifh.write(entry)
3046 ifh.write(entry)
2974 ifh.write(data[0])
3047 ifh.write(data[0])
2975 ifh.write(data[1])
3048 ifh.write(data[1])
2976 assert not sidedata
3049 assert not sidedata
2977 self._enforceinlinesize(transaction)
3050 self._enforceinlinesize(transaction)
2978 if self._docket is not None:
3051 if self._docket is not None:
2979 # revlog-v2 always has 3 writing handles, help Pytype
3052 # revlog-v2 always has 3 writing handles, help Pytype
2980 wh1 = self._writinghandles[0]
3053 wh1 = self._writinghandles[0]
2981 wh2 = self._writinghandles[1]
3054 wh2 = self._writinghandles[1]
2982 wh3 = self._writinghandles[2]
3055 wh3 = self._writinghandles[2]
2983 assert wh1 is not None
3056 assert wh1 is not None
2984 assert wh2 is not None
3057 assert wh2 is not None
2985 assert wh3 is not None
3058 assert wh3 is not None
2986 self._docket.index_end = wh1.tell()
3059 self._docket.index_end = wh1.tell()
2987 self._docket.data_end = wh2.tell()
3060 self._docket.data_end = wh2.tell()
2988 self._docket.sidedata_end = wh3.tell()
3061 self._docket.sidedata_end = wh3.tell()
2989
3062
2990 nodemaputil.setup_persistent_nodemap(transaction, self)
3063 nodemaputil.setup_persistent_nodemap(transaction, self)
2991
3064
2992 def addgroup(
3065 def addgroup(
2993 self,
3066 self,
2994 deltas,
3067 deltas,
2995 linkmapper,
3068 linkmapper,
2996 transaction,
3069 transaction,
2997 alwayscache=False,
3070 alwayscache=False,
2998 addrevisioncb=None,
3071 addrevisioncb=None,
2999 duplicaterevisioncb=None,
3072 duplicaterevisioncb=None,
3000 debug_info=None,
3073 debug_info=None,
3001 delta_base_reuse_policy=None,
3074 delta_base_reuse_policy=None,
3002 ):
3075 ):
3003 """
3076 """
3004 add a delta group
3077 add a delta group
3005
3078
3006 given a set of deltas, add them to the revision log. the
3079 given a set of deltas, add them to the revision log. the
3007 first delta is against its parent, which should be in our
3080 first delta is against its parent, which should be in our
3008 log, the rest are against the previous delta.
3081 log, the rest are against the previous delta.
3009
3082
3010 If ``addrevisioncb`` is defined, it will be called with arguments of
3083 If ``addrevisioncb`` is defined, it will be called with arguments of
3011 this revlog and the node that was added.
3084 this revlog and the node that was added.
3012 """
3085 """
3013
3086
3014 if self._adding_group:
3087 if self._adding_group:
3015 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3088 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3016
3089
3017 # read the default delta-base reuse policy from revlog config if the
3090 # read the default delta-base reuse policy from revlog config if the
3018 # group did not specify one.
3091 # group did not specify one.
3019 if delta_base_reuse_policy is None:
3092 if delta_base_reuse_policy is None:
3020 if (
3093 if (
3021 self.delta_config.general_delta
3094 self.delta_config.general_delta
3022 and self.delta_config.lazy_delta_base
3095 and self.delta_config.lazy_delta_base
3023 ):
3096 ):
3024 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3097 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3025 else:
3098 else:
3026 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3099 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3027
3100
3028 self._adding_group = True
3101 self._adding_group = True
3029 empty = True
3102 empty = True
3030 try:
3103 try:
3031 with self._writing(transaction):
3104 with self._writing(transaction):
3032 write_debug = None
3105 write_debug = None
3033 if self.delta_config.debug_delta:
3106 if self.delta_config.debug_delta:
3034 write_debug = transaction._report
3107 write_debug = transaction._report
3035 deltacomputer = deltautil.deltacomputer(
3108 deltacomputer = deltautil.deltacomputer(
3036 self,
3109 self,
3037 write_debug=write_debug,
3110 write_debug=write_debug,
3038 debug_info=debug_info,
3111 debug_info=debug_info,
3039 )
3112 )
3040 # loop through our set of deltas
3113 # loop through our set of deltas
3041 for data in deltas:
3114 for data in deltas:
3042 (
3115 (
3043 node,
3116 node,
3044 p1,
3117 p1,
3045 p2,
3118 p2,
3046 linknode,
3119 linknode,
3047 deltabase,
3120 deltabase,
3048 delta,
3121 delta,
3049 flags,
3122 flags,
3050 sidedata,
3123 sidedata,
3051 ) = data
3124 ) = data
3052 link = linkmapper(linknode)
3125 link = linkmapper(linknode)
3053 flags = flags or REVIDX_DEFAULT_FLAGS
3126 flags = flags or REVIDX_DEFAULT_FLAGS
3054
3127
3055 rev = self.index.get_rev(node)
3128 rev = self.index.get_rev(node)
3056 if rev is not None:
3129 if rev is not None:
3057 # this can happen if two branches make the same change
3130 # this can happen if two branches make the same change
3058 self._nodeduplicatecallback(transaction, rev)
3131 self._nodeduplicatecallback(transaction, rev)
3059 if duplicaterevisioncb:
3132 if duplicaterevisioncb:
3060 duplicaterevisioncb(self, rev)
3133 duplicaterevisioncb(self, rev)
3061 empty = False
3134 empty = False
3062 continue
3135 continue
3063
3136
3064 for p in (p1, p2):
3137 for p in (p1, p2):
3065 if not self.index.has_node(p):
3138 if not self.index.has_node(p):
3066 raise error.LookupError(
3139 raise error.LookupError(
3067 p, self.radix, _(b'unknown parent')
3140 p, self.radix, _(b'unknown parent')
3068 )
3141 )
3069
3142
3070 if not self.index.has_node(deltabase):
3143 if not self.index.has_node(deltabase):
3071 raise error.LookupError(
3144 raise error.LookupError(
3072 deltabase, self.display_id, _(b'unknown delta base')
3145 deltabase, self.display_id, _(b'unknown delta base')
3073 )
3146 )
3074
3147
3075 baserev = self.rev(deltabase)
3148 baserev = self.rev(deltabase)
3076
3149
3077 if baserev != nullrev and self.iscensored(baserev):
3150 if baserev != nullrev and self.iscensored(baserev):
3078 # if base is censored, delta must be full replacement in a
3151 # if base is censored, delta must be full replacement in a
3079 # single patch operation
3152 # single patch operation
3080 hlen = struct.calcsize(b">lll")
3153 hlen = struct.calcsize(b">lll")
3081 oldlen = self.rawsize(baserev)
3154 oldlen = self.rawsize(baserev)
3082 newlen = len(delta) - hlen
3155 newlen = len(delta) - hlen
3083 if delta[:hlen] != mdiff.replacediffheader(
3156 if delta[:hlen] != mdiff.replacediffheader(
3084 oldlen, newlen
3157 oldlen, newlen
3085 ):
3158 ):
3086 raise error.CensoredBaseError(
3159 raise error.CensoredBaseError(
3087 self.display_id, self.node(baserev)
3160 self.display_id, self.node(baserev)
3088 )
3161 )
3089
3162
3090 if not flags and self._peek_iscensored(baserev, delta):
3163 if not flags and self._peek_iscensored(baserev, delta):
3091 flags |= REVIDX_ISCENSORED
3164 flags |= REVIDX_ISCENSORED
3092
3165
3093 # We assume consumers of addrevisioncb will want to retrieve
3166 # We assume consumers of addrevisioncb will want to retrieve
3094 # the added revision, which will require a call to
3167 # the added revision, which will require a call to
3095 # revision(). revision() will fast path if there is a cache
3168 # revision(). revision() will fast path if there is a cache
3096 # hit. So, we tell _addrevision() to always cache in this case.
3169 # hit. So, we tell _addrevision() to always cache in this case.
3097 # We're only using addgroup() in the context of changegroup
3170 # We're only using addgroup() in the context of changegroup
3098 # generation so the revision data can always be handled as raw
3171 # generation so the revision data can always be handled as raw
3099 # by the flagprocessor.
3172 # by the flagprocessor.
3100 rev = self._addrevision(
3173 rev = self._addrevision(
3101 node,
3174 node,
3102 None,
3175 None,
3103 transaction,
3176 transaction,
3104 link,
3177 link,
3105 p1,
3178 p1,
3106 p2,
3179 p2,
3107 flags,
3180 flags,
3108 (baserev, delta, delta_base_reuse_policy),
3181 (baserev, delta, delta_base_reuse_policy),
3109 alwayscache=alwayscache,
3182 alwayscache=alwayscache,
3110 deltacomputer=deltacomputer,
3183 deltacomputer=deltacomputer,
3111 sidedata=sidedata,
3184 sidedata=sidedata,
3112 )
3185 )
3113
3186
3114 if addrevisioncb:
3187 if addrevisioncb:
3115 addrevisioncb(self, rev)
3188 addrevisioncb(self, rev)
3116 empty = False
3189 empty = False
3117 finally:
3190 finally:
3118 self._adding_group = False
3191 self._adding_group = False
3119 return not empty
3192 return not empty
3120
3193
3121 def iscensored(self, rev):
3194 def iscensored(self, rev):
3122 """Check if a file revision is censored."""
3195 """Check if a file revision is censored."""
3123 if not self.feature_config.censorable:
3196 if not self.feature_config.censorable:
3124 return False
3197 return False
3125
3198
3126 return self.flags(rev) & REVIDX_ISCENSORED
3199 return self.flags(rev) & REVIDX_ISCENSORED
3127
3200
3128 def _peek_iscensored(self, baserev, delta):
3201 def _peek_iscensored(self, baserev, delta):
3129 """Quickly check if a delta produces a censored revision."""
3202 """Quickly check if a delta produces a censored revision."""
3130 if not self.feature_config.censorable:
3203 if not self.feature_config.censorable:
3131 return False
3204 return False
3132
3205
3133 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3206 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3134
3207
3135 def getstrippoint(self, minlink):
3208 def getstrippoint(self, minlink):
3136 """find the minimum rev that must be stripped to strip the linkrev
3209 """find the minimum rev that must be stripped to strip the linkrev
3137
3210
3138 Returns a tuple containing the minimum rev and a set of all revs that
3211 Returns a tuple containing the minimum rev and a set of all revs that
3139 have linkrevs that will be broken by this strip.
3212 have linkrevs that will be broken by this strip.
3140 """
3213 """
3141 return storageutil.resolvestripinfo(
3214 return storageutil.resolvestripinfo(
3142 minlink,
3215 minlink,
3143 len(self) - 1,
3216 len(self) - 1,
3144 self.headrevs(),
3217 self.headrevs(),
3145 self.linkrev,
3218 self.linkrev,
3146 self.parentrevs,
3219 self.parentrevs,
3147 )
3220 )
3148
3221
3149 def strip(self, minlink, transaction):
3222 def strip(self, minlink, transaction):
3150 """truncate the revlog on the first revision with a linkrev >= minlink
3223 """truncate the revlog on the first revision with a linkrev >= minlink
3151
3224
3152 This function is called when we're stripping revision minlink and
3225 This function is called when we're stripping revision minlink and
3153 its descendants from the repository.
3226 its descendants from the repository.
3154
3227
3155 We have to remove all revisions with linkrev >= minlink, because
3228 We have to remove all revisions with linkrev >= minlink, because
3156 the equivalent changelog revisions will be renumbered after the
3229 the equivalent changelog revisions will be renumbered after the
3157 strip.
3230 strip.
3158
3231
3159 So we truncate the revlog on the first of these revisions, and
3232 So we truncate the revlog on the first of these revisions, and
3160 trust that the caller has saved the revisions that shouldn't be
3233 trust that the caller has saved the revisions that shouldn't be
3161 removed and that it'll re-add them after this truncation.
3234 removed and that it'll re-add them after this truncation.
3162 """
3235 """
3163 if len(self) == 0:
3236 if len(self) == 0:
3164 return
3237 return
3165
3238
3166 rev, _ = self.getstrippoint(minlink)
3239 rev, _ = self.getstrippoint(minlink)
3167 if rev == len(self):
3240 if rev == len(self):
3168 return
3241 return
3169
3242
3170 # first truncate the files on disk
3243 # first truncate the files on disk
3171 data_end = self.start(rev)
3244 data_end = self.start(rev)
3172 if not self._inline:
3245 if not self._inline:
3173 transaction.add(self._datafile, data_end)
3246 transaction.add(self._datafile, data_end)
3174 end = rev * self.index.entry_size
3247 end = rev * self.index.entry_size
3175 else:
3248 else:
3176 end = data_end + (rev * self.index.entry_size)
3249 end = data_end + (rev * self.index.entry_size)
3177
3250
3178 if self._sidedatafile:
3251 if self._sidedatafile:
3179 sidedata_end = self.sidedata_cut_off(rev)
3252 sidedata_end = self.sidedata_cut_off(rev)
3180 transaction.add(self._sidedatafile, sidedata_end)
3253 transaction.add(self._sidedatafile, sidedata_end)
3181
3254
3182 transaction.add(self._indexfile, end)
3255 transaction.add(self._indexfile, end)
3183 if self._docket is not None:
3256 if self._docket is not None:
3184 # XXX we could, leverage the docket while stripping. However it is
3257 # XXX we could, leverage the docket while stripping. However it is
3185 # not powerfull enough at the time of this comment
3258 # not powerfull enough at the time of this comment
3186 self._docket.index_end = end
3259 self._docket.index_end = end
3187 self._docket.data_end = data_end
3260 self._docket.data_end = data_end
3188 self._docket.sidedata_end = sidedata_end
3261 self._docket.sidedata_end = sidedata_end
3189 self._docket.write(transaction, stripping=True)
3262 self._docket.write(transaction, stripping=True)
3190
3263
3191 # then reset internal state in memory to forget those revisions
3264 # then reset internal state in memory to forget those revisions
3192 self._revisioncache = None
3265 self._revisioncache = None
3193 self._chaininfocache = util.lrucachedict(500)
3266 self._chaininfocache = util.lrucachedict(500)
3194 self._segmentfile.clear_cache()
3267 self._segmentfile.clear_cache()
3195 self._segmentfile_sidedata.clear_cache()
3268 self._segmentfile_sidedata.clear_cache()
3196
3269
3197 del self.index[rev:-1]
3270 del self.index[rev:-1]
3198
3271
3199 def checksize(self):
3272 def checksize(self):
3200 """Check size of index and data files
3273 """Check size of index and data files
3201
3274
3202 return a (dd, di) tuple.
3275 return a (dd, di) tuple.
3203 - dd: extra bytes for the "data" file
3276 - dd: extra bytes for the "data" file
3204 - di: extra bytes for the "index" file
3277 - di: extra bytes for the "index" file
3205
3278
3206 A healthy revlog will return (0, 0).
3279 A healthy revlog will return (0, 0).
3207 """
3280 """
3208 expected = 0
3281 expected = 0
3209 if len(self):
3282 if len(self):
3210 expected = max(0, self.end(len(self) - 1))
3283 expected = max(0, self.end(len(self) - 1))
3211
3284
3212 try:
3285 try:
3213 with self._datafp() as f:
3286 with self._datafp() as f:
3214 f.seek(0, io.SEEK_END)
3287 f.seek(0, io.SEEK_END)
3215 actual = f.tell()
3288 actual = f.tell()
3216 dd = actual - expected
3289 dd = actual - expected
3217 except FileNotFoundError:
3290 except FileNotFoundError:
3218 dd = 0
3291 dd = 0
3219
3292
3220 try:
3293 try:
3221 f = self.opener(self._indexfile)
3294 f = self.opener(self._indexfile)
3222 f.seek(0, io.SEEK_END)
3295 f.seek(0, io.SEEK_END)
3223 actual = f.tell()
3296 actual = f.tell()
3224 f.close()
3297 f.close()
3225 s = self.index.entry_size
3298 s = self.index.entry_size
3226 i = max(0, actual // s)
3299 i = max(0, actual // s)
3227 di = actual - (i * s)
3300 di = actual - (i * s)
3228 if self._inline:
3301 if self._inline:
3229 databytes = 0
3302 databytes = 0
3230 for r in self:
3303 for r in self:
3231 databytes += max(0, self.length(r))
3304 databytes += max(0, self.length(r))
3232 dd = 0
3305 dd = 0
3233 di = actual - len(self) * s - databytes
3306 di = actual - len(self) * s - databytes
3234 except FileNotFoundError:
3307 except FileNotFoundError:
3235 di = 0
3308 di = 0
3236
3309
3237 return (dd, di)
3310 return (dd, di)
3238
3311
3239 def files(self):
3312 def files(self):
3240 res = [self._indexfile]
3313 res = [self._indexfile]
3241 if self._docket_file is None:
3314 if self._docket_file is None:
3242 if not self._inline:
3315 if not self._inline:
3243 res.append(self._datafile)
3316 res.append(self._datafile)
3244 else:
3317 else:
3245 res.append(self._docket_file)
3318 res.append(self._docket_file)
3246 res.extend(self._docket.old_index_filepaths(include_empty=False))
3319 res.extend(self._docket.old_index_filepaths(include_empty=False))
3247 if self._docket.data_end:
3320 if self._docket.data_end:
3248 res.append(self._datafile)
3321 res.append(self._datafile)
3249 res.extend(self._docket.old_data_filepaths(include_empty=False))
3322 res.extend(self._docket.old_data_filepaths(include_empty=False))
3250 if self._docket.sidedata_end:
3323 if self._docket.sidedata_end:
3251 res.append(self._sidedatafile)
3324 res.append(self._sidedatafile)
3252 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3325 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3253 return res
3326 return res
3254
3327
3255 def emitrevisions(
3328 def emitrevisions(
3256 self,
3329 self,
3257 nodes,
3330 nodes,
3258 nodesorder=None,
3331 nodesorder=None,
3259 revisiondata=False,
3332 revisiondata=False,
3260 assumehaveparentrevisions=False,
3333 assumehaveparentrevisions=False,
3261 deltamode=repository.CG_DELTAMODE_STD,
3334 deltamode=repository.CG_DELTAMODE_STD,
3262 sidedata_helpers=None,
3335 sidedata_helpers=None,
3263 debug_info=None,
3336 debug_info=None,
3264 ):
3337 ):
3265 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3338 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3266 raise error.ProgrammingError(
3339 raise error.ProgrammingError(
3267 b'unhandled value for nodesorder: %s' % nodesorder
3340 b'unhandled value for nodesorder: %s' % nodesorder
3268 )
3341 )
3269
3342
3270 if nodesorder is None and not self.delta_config.general_delta:
3343 if nodesorder is None and not self.delta_config.general_delta:
3271 nodesorder = b'storage'
3344 nodesorder = b'storage'
3272
3345
3273 if (
3346 if (
3274 not self._storedeltachains
3347 not self._storedeltachains
3275 and deltamode != repository.CG_DELTAMODE_PREV
3348 and deltamode != repository.CG_DELTAMODE_PREV
3276 ):
3349 ):
3277 deltamode = repository.CG_DELTAMODE_FULL
3350 deltamode = repository.CG_DELTAMODE_FULL
3278
3351
3279 return storageutil.emitrevisions(
3352 return storageutil.emitrevisions(
3280 self,
3353 self,
3281 nodes,
3354 nodes,
3282 nodesorder,
3355 nodesorder,
3283 revlogrevisiondelta,
3356 revlogrevisiondelta,
3284 deltaparentfn=self.deltaparent,
3357 deltaparentfn=self.deltaparent,
3285 candeltafn=self._candelta,
3358 candeltafn=self._candelta,
3286 rawsizefn=self.rawsize,
3359 rawsizefn=self.rawsize,
3287 revdifffn=self.revdiff,
3360 revdifffn=self.revdiff,
3288 flagsfn=self.flags,
3361 flagsfn=self.flags,
3289 deltamode=deltamode,
3362 deltamode=deltamode,
3290 revisiondata=revisiondata,
3363 revisiondata=revisiondata,
3291 assumehaveparentrevisions=assumehaveparentrevisions,
3364 assumehaveparentrevisions=assumehaveparentrevisions,
3292 sidedata_helpers=sidedata_helpers,
3365 sidedata_helpers=sidedata_helpers,
3293 debug_info=debug_info,
3366 debug_info=debug_info,
3294 )
3367 )
3295
3368
3296 DELTAREUSEALWAYS = b'always'
3369 DELTAREUSEALWAYS = b'always'
3297 DELTAREUSESAMEREVS = b'samerevs'
3370 DELTAREUSESAMEREVS = b'samerevs'
3298 DELTAREUSENEVER = b'never'
3371 DELTAREUSENEVER = b'never'
3299
3372
3300 DELTAREUSEFULLADD = b'fulladd'
3373 DELTAREUSEFULLADD = b'fulladd'
3301
3374
3302 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3375 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3303
3376
3304 def clone(
3377 def clone(
3305 self,
3378 self,
3306 tr,
3379 tr,
3307 destrevlog,
3380 destrevlog,
3308 addrevisioncb=None,
3381 addrevisioncb=None,
3309 deltareuse=DELTAREUSESAMEREVS,
3382 deltareuse=DELTAREUSESAMEREVS,
3310 forcedeltabothparents=None,
3383 forcedeltabothparents=None,
3311 sidedata_helpers=None,
3384 sidedata_helpers=None,
3312 ):
3385 ):
3313 """Copy this revlog to another, possibly with format changes.
3386 """Copy this revlog to another, possibly with format changes.
3314
3387
3315 The destination revlog will contain the same revisions and nodes.
3388 The destination revlog will contain the same revisions and nodes.
3316 However, it may not be bit-for-bit identical due to e.g. delta encoding
3389 However, it may not be bit-for-bit identical due to e.g. delta encoding
3317 differences.
3390 differences.
3318
3391
3319 The ``deltareuse`` argument control how deltas from the existing revlog
3392 The ``deltareuse`` argument control how deltas from the existing revlog
3320 are preserved in the destination revlog. The argument can have the
3393 are preserved in the destination revlog. The argument can have the
3321 following values:
3394 following values:
3322
3395
3323 DELTAREUSEALWAYS
3396 DELTAREUSEALWAYS
3324 Deltas will always be reused (if possible), even if the destination
3397 Deltas will always be reused (if possible), even if the destination
3325 revlog would not select the same revisions for the delta. This is the
3398 revlog would not select the same revisions for the delta. This is the
3326 fastest mode of operation.
3399 fastest mode of operation.
3327 DELTAREUSESAMEREVS
3400 DELTAREUSESAMEREVS
3328 Deltas will be reused if the destination revlog would pick the same
3401 Deltas will be reused if the destination revlog would pick the same
3329 revisions for the delta. This mode strikes a balance between speed
3402 revisions for the delta. This mode strikes a balance between speed
3330 and optimization.
3403 and optimization.
3331 DELTAREUSENEVER
3404 DELTAREUSENEVER
3332 Deltas will never be reused. This is the slowest mode of execution.
3405 Deltas will never be reused. This is the slowest mode of execution.
3333 This mode can be used to recompute deltas (e.g. if the diff/delta
3406 This mode can be used to recompute deltas (e.g. if the diff/delta
3334 algorithm changes).
3407 algorithm changes).
3335 DELTAREUSEFULLADD
3408 DELTAREUSEFULLADD
3336 Revision will be re-added as if their were new content. This is
3409 Revision will be re-added as if their were new content. This is
3337 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3410 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3338 eg: large file detection and handling.
3411 eg: large file detection and handling.
3339
3412
3340 Delta computation can be slow, so the choice of delta reuse policy can
3413 Delta computation can be slow, so the choice of delta reuse policy can
3341 significantly affect run time.
3414 significantly affect run time.
3342
3415
3343 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3416 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3344 two extremes. Deltas will be reused if they are appropriate. But if the
3417 two extremes. Deltas will be reused if they are appropriate. But if the
3345 delta could choose a better revision, it will do so. This means if you
3418 delta could choose a better revision, it will do so. This means if you
3346 are converting a non-generaldelta revlog to a generaldelta revlog,
3419 are converting a non-generaldelta revlog to a generaldelta revlog,
3347 deltas will be recomputed if the delta's parent isn't a parent of the
3420 deltas will be recomputed if the delta's parent isn't a parent of the
3348 revision.
3421 revision.
3349
3422
3350 In addition to the delta policy, the ``forcedeltabothparents``
3423 In addition to the delta policy, the ``forcedeltabothparents``
3351 argument controls whether to force compute deltas against both parents
3424 argument controls whether to force compute deltas against both parents
3352 for merges. By default, the current default is used.
3425 for merges. By default, the current default is used.
3353
3426
3354 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3427 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3355 `sidedata_helpers`.
3428 `sidedata_helpers`.
3356 """
3429 """
3357 if deltareuse not in self.DELTAREUSEALL:
3430 if deltareuse not in self.DELTAREUSEALL:
3358 raise ValueError(
3431 raise ValueError(
3359 _(b'value for deltareuse invalid: %s') % deltareuse
3432 _(b'value for deltareuse invalid: %s') % deltareuse
3360 )
3433 )
3361
3434
3362 if len(destrevlog):
3435 if len(destrevlog):
3363 raise ValueError(_(b'destination revlog is not empty'))
3436 raise ValueError(_(b'destination revlog is not empty'))
3364
3437
3365 if getattr(self, 'filteredrevs', None):
3438 if getattr(self, 'filteredrevs', None):
3366 raise ValueError(_(b'source revlog has filtered revisions'))
3439 raise ValueError(_(b'source revlog has filtered revisions'))
3367 if getattr(destrevlog, 'filteredrevs', None):
3440 if getattr(destrevlog, 'filteredrevs', None):
3368 raise ValueError(_(b'destination revlog has filtered revisions'))
3441 raise ValueError(_(b'destination revlog has filtered revisions'))
3369
3442
3370 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3443 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3371 # if possible.
3444 # if possible.
3372 old_delta_config = destrevlog.delta_config
3445 old_delta_config = destrevlog.delta_config
3373 destrevlog.delta_config = destrevlog.delta_config.copy()
3446 destrevlog.delta_config = destrevlog.delta_config.copy()
3374
3447
3375 try:
3448 try:
3376 if deltareuse == self.DELTAREUSEALWAYS:
3449 if deltareuse == self.DELTAREUSEALWAYS:
3377 destrevlog.delta_config.lazy_delta_base = True
3450 destrevlog.delta_config.lazy_delta_base = True
3378 destrevlog.delta_config.lazy_delta = True
3451 destrevlog.delta_config.lazy_delta = True
3379 elif deltareuse == self.DELTAREUSESAMEREVS:
3452 elif deltareuse == self.DELTAREUSESAMEREVS:
3380 destrevlog.delta_config.lazy_delta_base = False
3453 destrevlog.delta_config.lazy_delta_base = False
3381 destrevlog.delta_config.lazy_delta = True
3454 destrevlog.delta_config.lazy_delta = True
3382 elif deltareuse == self.DELTAREUSENEVER:
3455 elif deltareuse == self.DELTAREUSENEVER:
3383 destrevlog.delta_config.lazy_delta_base = False
3456 destrevlog.delta_config.lazy_delta_base = False
3384 destrevlog.delta_config.lazy_delta = False
3457 destrevlog.delta_config.lazy_delta = False
3385
3458
3386 delta_both_parents = (
3459 delta_both_parents = (
3387 forcedeltabothparents or old_delta_config.delta_both_parents
3460 forcedeltabothparents or old_delta_config.delta_both_parents
3388 )
3461 )
3389 destrevlog.delta_config.delta_both_parents = delta_both_parents
3462 destrevlog.delta_config.delta_both_parents = delta_both_parents
3390
3463
3391 with self.reading():
3464 with self.reading():
3392 self._clone(
3465 self._clone(
3393 tr,
3466 tr,
3394 destrevlog,
3467 destrevlog,
3395 addrevisioncb,
3468 addrevisioncb,
3396 deltareuse,
3469 deltareuse,
3397 forcedeltabothparents,
3470 forcedeltabothparents,
3398 sidedata_helpers,
3471 sidedata_helpers,
3399 )
3472 )
3400
3473
3401 finally:
3474 finally:
3402 destrevlog.delta_config = old_delta_config
3475 destrevlog.delta_config = old_delta_config
3403
3476
3404 def _clone(
3477 def _clone(
3405 self,
3478 self,
3406 tr,
3479 tr,
3407 destrevlog,
3480 destrevlog,
3408 addrevisioncb,
3481 addrevisioncb,
3409 deltareuse,
3482 deltareuse,
3410 forcedeltabothparents,
3483 forcedeltabothparents,
3411 sidedata_helpers,
3484 sidedata_helpers,
3412 ):
3485 ):
3413 """perform the core duty of `revlog.clone` after parameter processing"""
3486 """perform the core duty of `revlog.clone` after parameter processing"""
3414 write_debug = None
3487 write_debug = None
3415 if self.delta_config.debug_delta:
3488 if self.delta_config.debug_delta:
3416 write_debug = tr._report
3489 write_debug = tr._report
3417 deltacomputer = deltautil.deltacomputer(
3490 deltacomputer = deltautil.deltacomputer(
3418 destrevlog,
3491 destrevlog,
3419 write_debug=write_debug,
3492 write_debug=write_debug,
3420 )
3493 )
3421 index = self.index
3494 index = self.index
3422 for rev in self:
3495 for rev in self:
3423 entry = index[rev]
3496 entry = index[rev]
3424
3497
3425 # Some classes override linkrev to take filtered revs into
3498 # Some classes override linkrev to take filtered revs into
3426 # account. Use raw entry from index.
3499 # account. Use raw entry from index.
3427 flags = entry[0] & 0xFFFF
3500 flags = entry[0] & 0xFFFF
3428 linkrev = entry[4]
3501 linkrev = entry[4]
3429 p1 = index[entry[5]][7]
3502 p1 = index[entry[5]][7]
3430 p2 = index[entry[6]][7]
3503 p2 = index[entry[6]][7]
3431 node = entry[7]
3504 node = entry[7]
3432
3505
3433 # (Possibly) reuse the delta from the revlog if allowed and
3506 # (Possibly) reuse the delta from the revlog if allowed and
3434 # the revlog chunk is a delta.
3507 # the revlog chunk is a delta.
3435 cachedelta = None
3508 cachedelta = None
3436 rawtext = None
3509 rawtext = None
3437 if deltareuse == self.DELTAREUSEFULLADD:
3510 if deltareuse == self.DELTAREUSEFULLADD:
3438 text = self._revisiondata(rev)
3511 text = self._revisiondata(rev)
3439 sidedata = self.sidedata(rev)
3512 sidedata = self.sidedata(rev)
3440
3513
3441 if sidedata_helpers is not None:
3514 if sidedata_helpers is not None:
3442 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3515 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3443 self, sidedata_helpers, sidedata, rev
3516 self, sidedata_helpers, sidedata, rev
3444 )
3517 )
3445 flags = flags | new_flags[0] & ~new_flags[1]
3518 flags = flags | new_flags[0] & ~new_flags[1]
3446
3519
3447 destrevlog.addrevision(
3520 destrevlog.addrevision(
3448 text,
3521 text,
3449 tr,
3522 tr,
3450 linkrev,
3523 linkrev,
3451 p1,
3524 p1,
3452 p2,
3525 p2,
3453 cachedelta=cachedelta,
3526 cachedelta=cachedelta,
3454 node=node,
3527 node=node,
3455 flags=flags,
3528 flags=flags,
3456 deltacomputer=deltacomputer,
3529 deltacomputer=deltacomputer,
3457 sidedata=sidedata,
3530 sidedata=sidedata,
3458 )
3531 )
3459 else:
3532 else:
3460 if destrevlog.delta_config.lazy_delta:
3533 if destrevlog.delta_config.lazy_delta:
3461 dp = self.deltaparent(rev)
3534 dp = self.deltaparent(rev)
3462 if dp != nullrev:
3535 if dp != nullrev:
3463 cachedelta = (dp, bytes(self._chunk(rev)))
3536 cachedelta = (dp, bytes(self._chunk(rev)))
3464
3537
3465 sidedata = None
3538 sidedata = None
3466 if not cachedelta:
3539 if not cachedelta:
3467 rawtext = self._revisiondata(rev)
3540 rawtext = self._revisiondata(rev)
3468 sidedata = self.sidedata(rev)
3541 sidedata = self.sidedata(rev)
3469 if sidedata is None:
3542 if sidedata is None:
3470 sidedata = self.sidedata(rev)
3543 sidedata = self.sidedata(rev)
3471
3544
3472 if sidedata_helpers is not None:
3545 if sidedata_helpers is not None:
3473 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3546 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3474 self, sidedata_helpers, sidedata, rev
3547 self, sidedata_helpers, sidedata, rev
3475 )
3548 )
3476 flags = flags | new_flags[0] & ~new_flags[1]
3549 flags = flags | new_flags[0] & ~new_flags[1]
3477
3550
3478 with destrevlog._writing(tr):
3551 with destrevlog._writing(tr):
3479 destrevlog._addrevision(
3552 destrevlog._addrevision(
3480 node,
3553 node,
3481 rawtext,
3554 rawtext,
3482 tr,
3555 tr,
3483 linkrev,
3556 linkrev,
3484 p1,
3557 p1,
3485 p2,
3558 p2,
3486 flags,
3559 flags,
3487 cachedelta,
3560 cachedelta,
3488 deltacomputer=deltacomputer,
3561 deltacomputer=deltacomputer,
3489 sidedata=sidedata,
3562 sidedata=sidedata,
3490 )
3563 )
3491
3564
3492 if addrevisioncb:
3565 if addrevisioncb:
3493 addrevisioncb(self, rev, node)
3566 addrevisioncb(self, rev, node)
3494
3567
3495 def censorrevision(self, tr, censornode, tombstone=b''):
3568 def censorrevision(self, tr, censornode, tombstone=b''):
3496 if self._format_version == REVLOGV0:
3569 if self._format_version == REVLOGV0:
3497 raise error.RevlogError(
3570 raise error.RevlogError(
3498 _(b'cannot censor with version %d revlogs')
3571 _(b'cannot censor with version %d revlogs')
3499 % self._format_version
3572 % self._format_version
3500 )
3573 )
3501 elif self._format_version == REVLOGV1:
3574 elif self._format_version == REVLOGV1:
3502 rewrite.v1_censor(self, tr, censornode, tombstone)
3575 rewrite.v1_censor(self, tr, censornode, tombstone)
3503 else:
3576 else:
3504 rewrite.v2_censor(self, tr, censornode, tombstone)
3577 rewrite.v2_censor(self, tr, censornode, tombstone)
3505
3578
3506 def verifyintegrity(self, state):
3579 def verifyintegrity(self, state):
3507 """Verifies the integrity of the revlog.
3580 """Verifies the integrity of the revlog.
3508
3581
3509 Yields ``revlogproblem`` instances describing problems that are
3582 Yields ``revlogproblem`` instances describing problems that are
3510 found.
3583 found.
3511 """
3584 """
3512 dd, di = self.checksize()
3585 dd, di = self.checksize()
3513 if dd:
3586 if dd:
3514 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3587 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3515 if di:
3588 if di:
3516 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3589 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3517
3590
3518 version = self._format_version
3591 version = self._format_version
3519
3592
3520 # The verifier tells us what version revlog we should be.
3593 # The verifier tells us what version revlog we should be.
3521 if version != state[b'expectedversion']:
3594 if version != state[b'expectedversion']:
3522 yield revlogproblem(
3595 yield revlogproblem(
3523 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3596 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3524 % (self.display_id, version, state[b'expectedversion'])
3597 % (self.display_id, version, state[b'expectedversion'])
3525 )
3598 )
3526
3599
3527 state[b'skipread'] = set()
3600 state[b'skipread'] = set()
3528 state[b'safe_renamed'] = set()
3601 state[b'safe_renamed'] = set()
3529
3602
3530 for rev in self:
3603 for rev in self:
3531 node = self.node(rev)
3604 node = self.node(rev)
3532
3605
3533 # Verify contents. 4 cases to care about:
3606 # Verify contents. 4 cases to care about:
3534 #
3607 #
3535 # common: the most common case
3608 # common: the most common case
3536 # rename: with a rename
3609 # rename: with a rename
3537 # meta: file content starts with b'\1\n', the metadata
3610 # meta: file content starts with b'\1\n', the metadata
3538 # header defined in filelog.py, but without a rename
3611 # header defined in filelog.py, but without a rename
3539 # ext: content stored externally
3612 # ext: content stored externally
3540 #
3613 #
3541 # More formally, their differences are shown below:
3614 # More formally, their differences are shown below:
3542 #
3615 #
3543 # | common | rename | meta | ext
3616 # | common | rename | meta | ext
3544 # -------------------------------------------------------
3617 # -------------------------------------------------------
3545 # flags() | 0 | 0 | 0 | not 0
3618 # flags() | 0 | 0 | 0 | not 0
3546 # renamed() | False | True | False | ?
3619 # renamed() | False | True | False | ?
3547 # rawtext[0:2]=='\1\n'| False | True | True | ?
3620 # rawtext[0:2]=='\1\n'| False | True | True | ?
3548 #
3621 #
3549 # "rawtext" means the raw text stored in revlog data, which
3622 # "rawtext" means the raw text stored in revlog data, which
3550 # could be retrieved by "rawdata(rev)". "text"
3623 # could be retrieved by "rawdata(rev)". "text"
3551 # mentioned below is "revision(rev)".
3624 # mentioned below is "revision(rev)".
3552 #
3625 #
3553 # There are 3 different lengths stored physically:
3626 # There are 3 different lengths stored physically:
3554 # 1. L1: rawsize, stored in revlog index
3627 # 1. L1: rawsize, stored in revlog index
3555 # 2. L2: len(rawtext), stored in revlog data
3628 # 2. L2: len(rawtext), stored in revlog data
3556 # 3. L3: len(text), stored in revlog data if flags==0, or
3629 # 3. L3: len(text), stored in revlog data if flags==0, or
3557 # possibly somewhere else if flags!=0
3630 # possibly somewhere else if flags!=0
3558 #
3631 #
3559 # L1 should be equal to L2. L3 could be different from them.
3632 # L1 should be equal to L2. L3 could be different from them.
3560 # "text" may or may not affect commit hash depending on flag
3633 # "text" may or may not affect commit hash depending on flag
3561 # processors (see flagutil.addflagprocessor).
3634 # processors (see flagutil.addflagprocessor).
3562 #
3635 #
3563 # | common | rename | meta | ext
3636 # | common | rename | meta | ext
3564 # -------------------------------------------------
3637 # -------------------------------------------------
3565 # rawsize() | L1 | L1 | L1 | L1
3638 # rawsize() | L1 | L1 | L1 | L1
3566 # size() | L1 | L2-LM | L1(*) | L1 (?)
3639 # size() | L1 | L2-LM | L1(*) | L1 (?)
3567 # len(rawtext) | L2 | L2 | L2 | L2
3640 # len(rawtext) | L2 | L2 | L2 | L2
3568 # len(text) | L2 | L2 | L2 | L3
3641 # len(text) | L2 | L2 | L2 | L3
3569 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3642 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3570 #
3643 #
3571 # LM: length of metadata, depending on rawtext
3644 # LM: length of metadata, depending on rawtext
3572 # (*): not ideal, see comment in filelog.size
3645 # (*): not ideal, see comment in filelog.size
3573 # (?): could be "- len(meta)" if the resolved content has
3646 # (?): could be "- len(meta)" if the resolved content has
3574 # rename metadata
3647 # rename metadata
3575 #
3648 #
3576 # Checks needed to be done:
3649 # Checks needed to be done:
3577 # 1. length check: L1 == L2, in all cases.
3650 # 1. length check: L1 == L2, in all cases.
3578 # 2. hash check: depending on flag processor, we may need to
3651 # 2. hash check: depending on flag processor, we may need to
3579 # use either "text" (external), or "rawtext" (in revlog).
3652 # use either "text" (external), or "rawtext" (in revlog).
3580
3653
3581 try:
3654 try:
3582 skipflags = state.get(b'skipflags', 0)
3655 skipflags = state.get(b'skipflags', 0)
3583 if skipflags:
3656 if skipflags:
3584 skipflags &= self.flags(rev)
3657 skipflags &= self.flags(rev)
3585
3658
3586 _verify_revision(self, skipflags, state, node)
3659 _verify_revision(self, skipflags, state, node)
3587
3660
3588 l1 = self.rawsize(rev)
3661 l1 = self.rawsize(rev)
3589 l2 = len(self.rawdata(node))
3662 l2 = len(self.rawdata(node))
3590
3663
3591 if l1 != l2:
3664 if l1 != l2:
3592 yield revlogproblem(
3665 yield revlogproblem(
3593 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3666 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3594 node=node,
3667 node=node,
3595 )
3668 )
3596
3669
3597 except error.CensoredNodeError:
3670 except error.CensoredNodeError:
3598 if state[b'erroroncensored']:
3671 if state[b'erroroncensored']:
3599 yield revlogproblem(
3672 yield revlogproblem(
3600 error=_(b'censored file data'), node=node
3673 error=_(b'censored file data'), node=node
3601 )
3674 )
3602 state[b'skipread'].add(node)
3675 state[b'skipread'].add(node)
3603 except Exception as e:
3676 except Exception as e:
3604 yield revlogproblem(
3677 yield revlogproblem(
3605 error=_(b'unpacking %s: %s')
3678 error=_(b'unpacking %s: %s')
3606 % (short(node), stringutil.forcebytestr(e)),
3679 % (short(node), stringutil.forcebytestr(e)),
3607 node=node,
3680 node=node,
3608 )
3681 )
3609 state[b'skipread'].add(node)
3682 state[b'skipread'].add(node)
3610
3683
3611 def storageinfo(
3684 def storageinfo(
3612 self,
3685 self,
3613 exclusivefiles=False,
3686 exclusivefiles=False,
3614 sharedfiles=False,
3687 sharedfiles=False,
3615 revisionscount=False,
3688 revisionscount=False,
3616 trackedsize=False,
3689 trackedsize=False,
3617 storedsize=False,
3690 storedsize=False,
3618 ):
3691 ):
3619 d = {}
3692 d = {}
3620
3693
3621 if exclusivefiles:
3694 if exclusivefiles:
3622 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3695 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3623 if not self._inline:
3696 if not self._inline:
3624 d[b'exclusivefiles'].append((self.opener, self._datafile))
3697 d[b'exclusivefiles'].append((self.opener, self._datafile))
3625
3698
3626 if sharedfiles:
3699 if sharedfiles:
3627 d[b'sharedfiles'] = []
3700 d[b'sharedfiles'] = []
3628
3701
3629 if revisionscount:
3702 if revisionscount:
3630 d[b'revisionscount'] = len(self)
3703 d[b'revisionscount'] = len(self)
3631
3704
3632 if trackedsize:
3705 if trackedsize:
3633 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3706 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3634
3707
3635 if storedsize:
3708 if storedsize:
3636 d[b'storedsize'] = sum(
3709 d[b'storedsize'] = sum(
3637 self.opener.stat(path).st_size for path in self.files()
3710 self.opener.stat(path).st_size for path in self.files()
3638 )
3711 )
3639
3712
3640 return d
3713 return d
3641
3714
3642 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3715 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3643 if not self.feature_config.has_side_data:
3716 if not self.feature_config.has_side_data:
3644 return
3717 return
3645 # revlog formats with sidedata support does not support inline
3718 # revlog formats with sidedata support does not support inline
3646 assert not self._inline
3719 assert not self._inline
3647 if not helpers[1] and not helpers[2]:
3720 if not helpers[1] and not helpers[2]:
3648 # Nothing to generate or remove
3721 # Nothing to generate or remove
3649 return
3722 return
3650
3723
3651 new_entries = []
3724 new_entries = []
3652 # append the new sidedata
3725 # append the new sidedata
3653 with self._writing(transaction):
3726 with self._writing(transaction):
3654 ifh, dfh, sdfh = self._writinghandles
3727 ifh, dfh, sdfh = self._writinghandles
3655 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3728 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3656
3729
3657 current_offset = sdfh.tell()
3730 current_offset = sdfh.tell()
3658 for rev in range(startrev, endrev + 1):
3731 for rev in range(startrev, endrev + 1):
3659 entry = self.index[rev]
3732 entry = self.index[rev]
3660 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3733 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3661 store=self,
3734 store=self,
3662 sidedata_helpers=helpers,
3735 sidedata_helpers=helpers,
3663 sidedata={},
3736 sidedata={},
3664 rev=rev,
3737 rev=rev,
3665 )
3738 )
3666
3739
3667 serialized_sidedata = sidedatautil.serialize_sidedata(
3740 serialized_sidedata = sidedatautil.serialize_sidedata(
3668 new_sidedata
3741 new_sidedata
3669 )
3742 )
3670
3743
3671 sidedata_compression_mode = COMP_MODE_INLINE
3744 sidedata_compression_mode = COMP_MODE_INLINE
3672 if serialized_sidedata and self.feature_config.has_side_data:
3745 if serialized_sidedata and self.feature_config.has_side_data:
3673 sidedata_compression_mode = COMP_MODE_PLAIN
3746 sidedata_compression_mode = COMP_MODE_PLAIN
3674 h, comp_sidedata = self.compress(serialized_sidedata)
3747 h, comp_sidedata = self.compress(serialized_sidedata)
3675 if (
3748 if (
3676 h != b'u'
3749 h != b'u'
3677 and comp_sidedata[0] != b'\0'
3750 and comp_sidedata[0] != b'\0'
3678 and len(comp_sidedata) < len(serialized_sidedata)
3751 and len(comp_sidedata) < len(serialized_sidedata)
3679 ):
3752 ):
3680 assert not h
3753 assert not h
3681 if (
3754 if (
3682 comp_sidedata[0]
3755 comp_sidedata[0]
3683 == self._docket.default_compression_header
3756 == self._docket.default_compression_header
3684 ):
3757 ):
3685 sidedata_compression_mode = COMP_MODE_DEFAULT
3758 sidedata_compression_mode = COMP_MODE_DEFAULT
3686 serialized_sidedata = comp_sidedata
3759 serialized_sidedata = comp_sidedata
3687 else:
3760 else:
3688 sidedata_compression_mode = COMP_MODE_INLINE
3761 sidedata_compression_mode = COMP_MODE_INLINE
3689 serialized_sidedata = comp_sidedata
3762 serialized_sidedata = comp_sidedata
3690 if entry[8] != 0 or entry[9] != 0:
3763 if entry[8] != 0 or entry[9] != 0:
3691 # rewriting entries that already have sidedata is not
3764 # rewriting entries that already have sidedata is not
3692 # supported yet, because it introduces garbage data in the
3765 # supported yet, because it introduces garbage data in the
3693 # revlog.
3766 # revlog.
3694 msg = b"rewriting existing sidedata is not supported yet"
3767 msg = b"rewriting existing sidedata is not supported yet"
3695 raise error.Abort(msg)
3768 raise error.Abort(msg)
3696
3769
3697 # Apply (potential) flags to add and to remove after running
3770 # Apply (potential) flags to add and to remove after running
3698 # the sidedata helpers
3771 # the sidedata helpers
3699 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3772 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3700 entry_update = (
3773 entry_update = (
3701 current_offset,
3774 current_offset,
3702 len(serialized_sidedata),
3775 len(serialized_sidedata),
3703 new_offset_flags,
3776 new_offset_flags,
3704 sidedata_compression_mode,
3777 sidedata_compression_mode,
3705 )
3778 )
3706
3779
3707 # the sidedata computation might have move the file cursors around
3780 # the sidedata computation might have move the file cursors around
3708 sdfh.seek(current_offset, os.SEEK_SET)
3781 sdfh.seek(current_offset, os.SEEK_SET)
3709 sdfh.write(serialized_sidedata)
3782 sdfh.write(serialized_sidedata)
3710 new_entries.append(entry_update)
3783 new_entries.append(entry_update)
3711 current_offset += len(serialized_sidedata)
3784 current_offset += len(serialized_sidedata)
3712 self._docket.sidedata_end = sdfh.tell()
3785 self._docket.sidedata_end = sdfh.tell()
3713
3786
3714 # rewrite the new index entries
3787 # rewrite the new index entries
3715 ifh.seek(startrev * self.index.entry_size)
3788 ifh.seek(startrev * self.index.entry_size)
3716 for i, e in enumerate(new_entries):
3789 for i, e in enumerate(new_entries):
3717 rev = startrev + i
3790 rev = startrev + i
3718 self.index.replace_sidedata_info(rev, *e)
3791 self.index.replace_sidedata_info(rev, *e)
3719 packed = self.index.entry_binary(rev)
3792 packed = self.index.entry_binary(rev)
3720 if rev == 0 and self._docket is None:
3793 if rev == 0 and self._docket is None:
3721 header = self._format_flags | self._format_version
3794 header = self._format_flags | self._format_version
3722 header = self.index.pack_header(header)
3795 header = self.index.pack_header(header)
3723 packed = header + packed
3796 packed = header + packed
3724 ifh.write(packed)
3797 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now