##// END OF EJS Templates
revlog: remove the `_indexfp` method...
marmoute -
r51974:5ffee3cf default
parent child Browse files
Show More
@@ -1,3800 +1,3796 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class revlog:
340 class revlog:
341 """
341 """
342 the underlying revision storage object
342 the underlying revision storage object
343
343
344 A revlog consists of two parts, an index and the revision data.
344 A revlog consists of two parts, an index and the revision data.
345
345
346 The index is a file with a fixed record size containing
346 The index is a file with a fixed record size containing
347 information on each revision, including its nodeid (hash), the
347 information on each revision, including its nodeid (hash), the
348 nodeids of its parents, the position and offset of its data within
348 nodeids of its parents, the position and offset of its data within
349 the data file, and the revision it's based on. Finally, each entry
349 the data file, and the revision it's based on. Finally, each entry
350 contains a linkrev entry that can serve as a pointer to external
350 contains a linkrev entry that can serve as a pointer to external
351 data.
351 data.
352
352
353 The revision data itself is a linear collection of data chunks.
353 The revision data itself is a linear collection of data chunks.
354 Each chunk represents a revision and is usually represented as a
354 Each chunk represents a revision and is usually represented as a
355 delta against the previous chunk. To bound lookup time, runs of
355 delta against the previous chunk. To bound lookup time, runs of
356 deltas are limited to about 2 times the length of the original
356 deltas are limited to about 2 times the length of the original
357 version data. This makes retrieval of a version proportional to
357 version data. This makes retrieval of a version proportional to
358 its size, or O(1) relative to the number of revisions.
358 its size, or O(1) relative to the number of revisions.
359
359
360 Both pieces of the revlog are written to in an append-only
360 Both pieces of the revlog are written to in an append-only
361 fashion, which means we never need to rewrite a file to insert or
361 fashion, which means we never need to rewrite a file to insert or
362 remove data, and can use some simple techniques to avoid the need
362 remove data, and can use some simple techniques to avoid the need
363 for locking while reading.
363 for locking while reading.
364
364
365 If checkambig, indexfile is opened with checkambig=True at
365 If checkambig, indexfile is opened with checkambig=True at
366 writing, to avoid file stat ambiguity.
366 writing, to avoid file stat ambiguity.
367
367
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 index will be mmapped rather than read if it is larger than the
369 index will be mmapped rather than read if it is larger than the
370 configured threshold.
370 configured threshold.
371
371
372 If censorable is True, the revlog can have censored revisions.
372 If censorable is True, the revlog can have censored revisions.
373
373
374 If `upperboundcomp` is not None, this is the expected maximal gain from
374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 compression for the data content.
375 compression for the data content.
376
376
377 `concurrencychecker` is an optional function that receives 3 arguments: a
377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 file handle, a filename, and an expected position. It should check whether
378 file handle, a filename, and an expected position. It should check whether
379 the current position in the file handle is valid, and log/warn/fail (by
379 the current position in the file handle is valid, and log/warn/fail (by
380 raising).
380 raising).
381
381
382 See mercurial/revlogutils/contants.py for details about the content of an
382 See mercurial/revlogutils/contants.py for details about the content of an
383 index entry.
383 index entry.
384 """
384 """
385
385
386 _flagserrorclass = error.RevlogError
386 _flagserrorclass = error.RevlogError
387
387
388 @staticmethod
388 @staticmethod
389 def is_inline_index(header_bytes):
389 def is_inline_index(header_bytes):
390 """Determine if a revlog is inline from the initial bytes of the index"""
390 """Determine if a revlog is inline from the initial bytes of the index"""
391 header = INDEX_HEADER.unpack(header_bytes)[0]
391 header = INDEX_HEADER.unpack(header_bytes)[0]
392
392
393 _format_flags = header & ~0xFFFF
393 _format_flags = header & ~0xFFFF
394 _format_version = header & 0xFFFF
394 _format_version = header & 0xFFFF
395
395
396 features = FEATURES_BY_VERSION[_format_version]
396 features = FEATURES_BY_VERSION[_format_version]
397 return features[b'inline'](_format_flags)
397 return features[b'inline'](_format_flags)
398
398
399 def __init__(
399 def __init__(
400 self,
400 self,
401 opener,
401 opener,
402 target,
402 target,
403 radix,
403 radix,
404 postfix=None, # only exist for `tmpcensored` now
404 postfix=None, # only exist for `tmpcensored` now
405 checkambig=False,
405 checkambig=False,
406 mmaplargeindex=False,
406 mmaplargeindex=False,
407 censorable=False,
407 censorable=False,
408 upperboundcomp=None,
408 upperboundcomp=None,
409 persistentnodemap=False,
409 persistentnodemap=False,
410 concurrencychecker=None,
410 concurrencychecker=None,
411 trypending=False,
411 trypending=False,
412 try_split=False,
412 try_split=False,
413 canonical_parent_order=True,
413 canonical_parent_order=True,
414 ):
414 ):
415 """
415 """
416 create a revlog object
416 create a revlog object
417
417
418 opener is a function that abstracts the file opening operation
418 opener is a function that abstracts the file opening operation
419 and can be used to implement COW semantics or the like.
419 and can be used to implement COW semantics or the like.
420
420
421 `target`: a (KIND, ID) tuple that identify the content stored in
421 `target`: a (KIND, ID) tuple that identify the content stored in
422 this revlog. It help the rest of the code to understand what the revlog
422 this revlog. It help the rest of the code to understand what the revlog
423 is about without having to resort to heuristic and index filename
423 is about without having to resort to heuristic and index filename
424 analysis. Note: that this must be reliably be set by normal code, but
424 analysis. Note: that this must be reliably be set by normal code, but
425 that test, debug, or performance measurement code might not set this to
425 that test, debug, or performance measurement code might not set this to
426 accurate value.
426 accurate value.
427 """
427 """
428 self.upperboundcomp = upperboundcomp
428 self.upperboundcomp = upperboundcomp
429
429
430 self.radix = radix
430 self.radix = radix
431
431
432 self._docket_file = None
432 self._docket_file = None
433 self._indexfile = None
433 self._indexfile = None
434 self._datafile = None
434 self._datafile = None
435 self._sidedatafile = None
435 self._sidedatafile = None
436 self._nodemap_file = None
436 self._nodemap_file = None
437 self.postfix = postfix
437 self.postfix = postfix
438 self._trypending = trypending
438 self._trypending = trypending
439 self._try_split = try_split
439 self._try_split = try_split
440 self.opener = opener
440 self.opener = opener
441 if persistentnodemap:
441 if persistentnodemap:
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443
443
444 assert target[0] in ALL_KINDS
444 assert target[0] in ALL_KINDS
445 assert len(target) == 2
445 assert len(target) == 2
446 self.target = target
446 self.target = target
447 if b'feature-config' in self.opener.options:
447 if b'feature-config' in self.opener.options:
448 self.feature_config = self.opener.options[b'feature-config'].copy()
448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 else:
449 else:
450 self.feature_config = FeatureConfig()
450 self.feature_config = FeatureConfig()
451 self.feature_config.censorable = censorable
451 self.feature_config.censorable = censorable
452 self.feature_config.canonical_parent_order = canonical_parent_order
452 self.feature_config.canonical_parent_order = canonical_parent_order
453 if b'data-config' in self.opener.options:
453 if b'data-config' in self.opener.options:
454 self.data_config = self.opener.options[b'data-config'].copy()
454 self.data_config = self.opener.options[b'data-config'].copy()
455 else:
455 else:
456 self.data_config = DataConfig()
456 self.data_config = DataConfig()
457 self.data_config.check_ambig = checkambig
457 self.data_config.check_ambig = checkambig
458 self.data_config.mmap_large_index = mmaplargeindex
458 self.data_config.mmap_large_index = mmaplargeindex
459 if b'delta-config' in self.opener.options:
459 if b'delta-config' in self.opener.options:
460 self.delta_config = self.opener.options[b'delta-config'].copy()
460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 else:
461 else:
462 self.delta_config = DeltaConfig()
462 self.delta_config = DeltaConfig()
463
463
464 # 3-tuple of (node, rev, text) for a raw revision.
464 # 3-tuple of (node, rev, text) for a raw revision.
465 self._revisioncache = None
465 self._revisioncache = None
466 # Maps rev to chain base rev.
466 # Maps rev to chain base rev.
467 self._chainbasecache = util.lrucachedict(100)
467 self._chainbasecache = util.lrucachedict(100)
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 self._chunkcache = (0, b'')
469 self._chunkcache = (0, b'')
470
470
471 self.index = None
471 self.index = None
472 self._docket = None
472 self._docket = None
473 self._nodemap_docket = None
473 self._nodemap_docket = None
474 # Mapping of partial identifiers to full nodes.
474 # Mapping of partial identifiers to full nodes.
475 self._pcache = {}
475 self._pcache = {}
476
476
477 # other optionnals features
477 # other optionnals features
478
478
479 # Make copy of flag processors so each revlog instance can support
479 # Make copy of flag processors so each revlog instance can support
480 # custom flags.
480 # custom flags.
481 self._flagprocessors = dict(flagutil.flagprocessors)
481 self._flagprocessors = dict(flagutil.flagprocessors)
482
482
483 # 3-tuple of file handles being used for active writing.
483 # 3-tuple of file handles being used for active writing.
484 self._writinghandles = None
484 self._writinghandles = None
485 # prevent nesting of addgroup
485 # prevent nesting of addgroup
486 self._adding_group = None
486 self._adding_group = None
487
487
488 self._loadindex()
488 self._loadindex()
489
489
490 self._concurrencychecker = concurrencychecker
490 self._concurrencychecker = concurrencychecker
491
491
492 @property
492 @property
493 def _generaldelta(self):
493 def _generaldelta(self):
494 """temporary compatibility proxy"""
494 """temporary compatibility proxy"""
495 util.nouideprecwarn(
495 util.nouideprecwarn(
496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
496 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
497 )
497 )
498 return self.delta_config.general_delta
498 return self.delta_config.general_delta
499
499
500 @property
500 @property
501 def _checkambig(self):
501 def _checkambig(self):
502 """temporary compatibility proxy"""
502 """temporary compatibility proxy"""
503 util.nouideprecwarn(
503 util.nouideprecwarn(
504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
504 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
505 )
505 )
506 return self.data_config.check_ambig
506 return self.data_config.check_ambig
507
507
508 @property
508 @property
509 def _mmaplargeindex(self):
509 def _mmaplargeindex(self):
510 """temporary compatibility proxy"""
510 """temporary compatibility proxy"""
511 util.nouideprecwarn(
511 util.nouideprecwarn(
512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
512 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
513 )
513 )
514 return self.data_config.mmap_large_index
514 return self.data_config.mmap_large_index
515
515
516 @property
516 @property
517 def _censorable(self):
517 def _censorable(self):
518 """temporary compatibility proxy"""
518 """temporary compatibility proxy"""
519 util.nouideprecwarn(
519 util.nouideprecwarn(
520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
520 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
521 )
521 )
522 return self.feature_config.censorable
522 return self.feature_config.censorable
523
523
524 @property
524 @property
525 def _chunkcachesize(self):
525 def _chunkcachesize(self):
526 """temporary compatibility proxy"""
526 """temporary compatibility proxy"""
527 util.nouideprecwarn(
527 util.nouideprecwarn(
528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
528 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
529 )
529 )
530 return self.data_config.chunk_cache_size
530 return self.data_config.chunk_cache_size
531
531
532 @property
532 @property
533 def _maxchainlen(self):
533 def _maxchainlen(self):
534 """temporary compatibility proxy"""
534 """temporary compatibility proxy"""
535 util.nouideprecwarn(
535 util.nouideprecwarn(
536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
536 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
537 )
537 )
538 return self.delta_config.max_chain_len
538 return self.delta_config.max_chain_len
539
539
540 @property
540 @property
541 def _deltabothparents(self):
541 def _deltabothparents(self):
542 """temporary compatibility proxy"""
542 """temporary compatibility proxy"""
543 util.nouideprecwarn(
543 util.nouideprecwarn(
544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
544 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
545 )
545 )
546 return self.delta_config.delta_both_parents
546 return self.delta_config.delta_both_parents
547
547
548 @property
548 @property
549 def _candidate_group_chunk_size(self):
549 def _candidate_group_chunk_size(self):
550 """temporary compatibility proxy"""
550 """temporary compatibility proxy"""
551 util.nouideprecwarn(
551 util.nouideprecwarn(
552 b"use revlog.delta_config.candidate_group_chunk_size",
552 b"use revlog.delta_config.candidate_group_chunk_size",
553 b"6.6",
553 b"6.6",
554 stacklevel=2,
554 stacklevel=2,
555 )
555 )
556 return self.delta_config.candidate_group_chunk_size
556 return self.delta_config.candidate_group_chunk_size
557
557
558 @property
558 @property
559 def _debug_delta(self):
559 def _debug_delta(self):
560 """temporary compatibility proxy"""
560 """temporary compatibility proxy"""
561 util.nouideprecwarn(
561 util.nouideprecwarn(
562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
562 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
563 )
563 )
564 return self.delta_config.debug_delta
564 return self.delta_config.debug_delta
565
565
566 @property
566 @property
567 def _compengine(self):
567 def _compengine(self):
568 """temporary compatibility proxy"""
568 """temporary compatibility proxy"""
569 util.nouideprecwarn(
569 util.nouideprecwarn(
570 b"use revlog.feature_config.compression_engine",
570 b"use revlog.feature_config.compression_engine",
571 b"6.6",
571 b"6.6",
572 stacklevel=2,
572 stacklevel=2,
573 )
573 )
574 return self.feature_config.compression_engine
574 return self.feature_config.compression_engine
575
575
576 @property
576 @property
577 def _compengineopts(self):
577 def _compengineopts(self):
578 """temporary compatibility proxy"""
578 """temporary compatibility proxy"""
579 util.nouideprecwarn(
579 util.nouideprecwarn(
580 b"use revlog.feature_config.compression_engine_options",
580 b"use revlog.feature_config.compression_engine_options",
581 b"6.6",
581 b"6.6",
582 stacklevel=2,
582 stacklevel=2,
583 )
583 )
584 return self.feature_config.compression_engine_options
584 return self.feature_config.compression_engine_options
585
585
586 @property
586 @property
587 def _maxdeltachainspan(self):
587 def _maxdeltachainspan(self):
588 """temporary compatibility proxy"""
588 """temporary compatibility proxy"""
589 util.nouideprecwarn(
589 util.nouideprecwarn(
590 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
590 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
591 )
591 )
592 return self.delta_config.max_deltachain_span
592 return self.delta_config.max_deltachain_span
593
593
594 @property
594 @property
595 def _withsparseread(self):
595 def _withsparseread(self):
596 """temporary compatibility proxy"""
596 """temporary compatibility proxy"""
597 util.nouideprecwarn(
597 util.nouideprecwarn(
598 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
598 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
599 )
599 )
600 return self.data_config.with_sparse_read
600 return self.data_config.with_sparse_read
601
601
602 @property
602 @property
603 def _sparserevlog(self):
603 def _sparserevlog(self):
604 """temporary compatibility proxy"""
604 """temporary compatibility proxy"""
605 util.nouideprecwarn(
605 util.nouideprecwarn(
606 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
606 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
607 )
607 )
608 return self.delta_config.sparse_revlog
608 return self.delta_config.sparse_revlog
609
609
610 @property
610 @property
611 def hassidedata(self):
611 def hassidedata(self):
612 """temporary compatibility proxy"""
612 """temporary compatibility proxy"""
613 util.nouideprecwarn(
613 util.nouideprecwarn(
614 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
614 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
615 )
615 )
616 return self.feature_config.has_side_data
616 return self.feature_config.has_side_data
617
617
618 @property
618 @property
619 def _srdensitythreshold(self):
619 def _srdensitythreshold(self):
620 """temporary compatibility proxy"""
620 """temporary compatibility proxy"""
621 util.nouideprecwarn(
621 util.nouideprecwarn(
622 b"use revlog.data_config.sr_density_threshold",
622 b"use revlog.data_config.sr_density_threshold",
623 b"6.6",
623 b"6.6",
624 stacklevel=2,
624 stacklevel=2,
625 )
625 )
626 return self.data_config.sr_density_threshold
626 return self.data_config.sr_density_threshold
627
627
628 @property
628 @property
629 def _srmingapsize(self):
629 def _srmingapsize(self):
630 """temporary compatibility proxy"""
630 """temporary compatibility proxy"""
631 util.nouideprecwarn(
631 util.nouideprecwarn(
632 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
632 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
633 )
633 )
634 return self.data_config.sr_min_gap_size
634 return self.data_config.sr_min_gap_size
635
635
636 @property
636 @property
637 def _compute_rank(self):
637 def _compute_rank(self):
638 """temporary compatibility proxy"""
638 """temporary compatibility proxy"""
639 util.nouideprecwarn(
639 util.nouideprecwarn(
640 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
640 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
641 )
641 )
642 return self.feature_config.compute_rank
642 return self.feature_config.compute_rank
643
643
644 @property
644 @property
645 def canonical_parent_order(self):
645 def canonical_parent_order(self):
646 """temporary compatibility proxy"""
646 """temporary compatibility proxy"""
647 util.nouideprecwarn(
647 util.nouideprecwarn(
648 b"use revlog.feature_config.canonical_parent_order",
648 b"use revlog.feature_config.canonical_parent_order",
649 b"6.6",
649 b"6.6",
650 stacklevel=2,
650 stacklevel=2,
651 )
651 )
652 return self.feature_config.canonical_parent_order
652 return self.feature_config.canonical_parent_order
653
653
654 @property
654 @property
655 def _lazydelta(self):
655 def _lazydelta(self):
656 """temporary compatibility proxy"""
656 """temporary compatibility proxy"""
657 util.nouideprecwarn(
657 util.nouideprecwarn(
658 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
658 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
659 )
659 )
660 return self.delta_config.lazy_delta
660 return self.delta_config.lazy_delta
661
661
662 @property
662 @property
663 def _lazydeltabase(self):
663 def _lazydeltabase(self):
664 """temporary compatibility proxy"""
664 """temporary compatibility proxy"""
665 util.nouideprecwarn(
665 util.nouideprecwarn(
666 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
666 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
667 )
667 )
668 return self.delta_config.lazy_delta_base
668 return self.delta_config.lazy_delta_base
669
669
670 def _init_opts(self):
670 def _init_opts(self):
671 """process options (from above/config) to setup associated default revlog mode
671 """process options (from above/config) to setup associated default revlog mode
672
672
673 These values might be affected when actually reading on disk information.
673 These values might be affected when actually reading on disk information.
674
674
675 The relevant values are returned for use in _loadindex().
675 The relevant values are returned for use in _loadindex().
676
676
677 * newversionflags:
677 * newversionflags:
678 version header to use if we need to create a new revlog
678 version header to use if we need to create a new revlog
679
679
680 * mmapindexthreshold:
680 * mmapindexthreshold:
681 minimal index size for start to use mmap
681 minimal index size for start to use mmap
682
682
683 * force_nodemap:
683 * force_nodemap:
684 force the usage of a "development" version of the nodemap code
684 force the usage of a "development" version of the nodemap code
685 """
685 """
686 opts = self.opener.options
686 opts = self.opener.options
687
687
688 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
688 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
689 new_header = CHANGELOGV2
689 new_header = CHANGELOGV2
690 compute_rank = opts.get(b'changelogv2.compute-rank', True)
690 compute_rank = opts.get(b'changelogv2.compute-rank', True)
691 self.feature_config.compute_rank = compute_rank
691 self.feature_config.compute_rank = compute_rank
692 elif b'revlogv2' in opts:
692 elif b'revlogv2' in opts:
693 new_header = REVLOGV2
693 new_header = REVLOGV2
694 elif b'revlogv1' in opts:
694 elif b'revlogv1' in opts:
695 new_header = REVLOGV1 | FLAG_INLINE_DATA
695 new_header = REVLOGV1 | FLAG_INLINE_DATA
696 if b'generaldelta' in opts:
696 if b'generaldelta' in opts:
697 new_header |= FLAG_GENERALDELTA
697 new_header |= FLAG_GENERALDELTA
698 elif b'revlogv0' in self.opener.options:
698 elif b'revlogv0' in self.opener.options:
699 new_header = REVLOGV0
699 new_header = REVLOGV0
700 else:
700 else:
701 new_header = REVLOG_DEFAULT_VERSION
701 new_header = REVLOG_DEFAULT_VERSION
702
702
703 mmapindexthreshold = None
703 mmapindexthreshold = None
704 if self.data_config.mmap_large_index:
704 if self.data_config.mmap_large_index:
705 mmapindexthreshold = self.data_config.mmap_index_threshold
705 mmapindexthreshold = self.data_config.mmap_index_threshold
706 if self.feature_config.enable_ellipsis:
706 if self.feature_config.enable_ellipsis:
707 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
707 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
708
708
709 # revlog v0 doesn't have flag processors
709 # revlog v0 doesn't have flag processors
710 for flag, processor in opts.get(b'flagprocessors', {}).items():
710 for flag, processor in opts.get(b'flagprocessors', {}).items():
711 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
711 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
712
712
713 chunk_cache_size = self.data_config.chunk_cache_size
713 chunk_cache_size = self.data_config.chunk_cache_size
714 if chunk_cache_size <= 0:
714 if chunk_cache_size <= 0:
715 raise error.RevlogError(
715 raise error.RevlogError(
716 _(b'revlog chunk cache size %r is not greater than 0')
716 _(b'revlog chunk cache size %r is not greater than 0')
717 % chunk_cache_size
717 % chunk_cache_size
718 )
718 )
719 elif chunk_cache_size & (chunk_cache_size - 1):
719 elif chunk_cache_size & (chunk_cache_size - 1):
720 raise error.RevlogError(
720 raise error.RevlogError(
721 _(b'revlog chunk cache size %r is not a power of 2')
721 _(b'revlog chunk cache size %r is not a power of 2')
722 % chunk_cache_size
722 % chunk_cache_size
723 )
723 )
724 force_nodemap = opts.get(b'devel-force-nodemap', False)
724 force_nodemap = opts.get(b'devel-force-nodemap', False)
725 return new_header, mmapindexthreshold, force_nodemap
725 return new_header, mmapindexthreshold, force_nodemap
726
726
727 def _get_data(self, filepath, mmap_threshold, size=None):
727 def _get_data(self, filepath, mmap_threshold, size=None):
728 """return a file content with or without mmap
728 """return a file content with or without mmap
729
729
730 If the file is missing return the empty string"""
730 If the file is missing return the empty string"""
731 try:
731 try:
732 with self.opener(filepath) as fp:
732 with self.opener(filepath) as fp:
733 if mmap_threshold is not None:
733 if mmap_threshold is not None:
734 file_size = self.opener.fstat(fp).st_size
734 file_size = self.opener.fstat(fp).st_size
735 if file_size >= mmap_threshold:
735 if file_size >= mmap_threshold:
736 if size is not None:
736 if size is not None:
737 # avoid potentiel mmap crash
737 # avoid potentiel mmap crash
738 size = min(file_size, size)
738 size = min(file_size, size)
739 # TODO: should .close() to release resources without
739 # TODO: should .close() to release resources without
740 # relying on Python GC
740 # relying on Python GC
741 if size is None:
741 if size is None:
742 return util.buffer(util.mmapread(fp))
742 return util.buffer(util.mmapread(fp))
743 else:
743 else:
744 return util.buffer(util.mmapread(fp, size))
744 return util.buffer(util.mmapread(fp, size))
745 if size is None:
745 if size is None:
746 return fp.read()
746 return fp.read()
747 else:
747 else:
748 return fp.read(size)
748 return fp.read(size)
749 except FileNotFoundError:
749 except FileNotFoundError:
750 return b''
750 return b''
751
751
752 def get_streams(self, max_linkrev, force_inline=False):
752 def get_streams(self, max_linkrev, force_inline=False):
753 """return a list of streams that represent this revlog
753 """return a list of streams that represent this revlog
754
754
755 This is used by stream-clone to do bytes to bytes copies of a repository.
755 This is used by stream-clone to do bytes to bytes copies of a repository.
756
756
757 This streams data for all revisions that refer to a changelog revision up
757 This streams data for all revisions that refer to a changelog revision up
758 to `max_linkrev`.
758 to `max_linkrev`.
759
759
760 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
760 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
761
761
762 It returns is a list of three-tuple:
762 It returns is a list of three-tuple:
763
763
764 [
764 [
765 (filename, bytes_stream, stream_size),
765 (filename, bytes_stream, stream_size),
766 …
766 …
767 ]
767 ]
768 """
768 """
769 n = len(self)
769 n = len(self)
770 index = self.index
770 index = self.index
771 while n > 0:
771 while n > 0:
772 linkrev = index[n - 1][4]
772 linkrev = index[n - 1][4]
773 if linkrev < max_linkrev:
773 if linkrev < max_linkrev:
774 break
774 break
775 # note: this loop will rarely go through multiple iterations, since
775 # note: this loop will rarely go through multiple iterations, since
776 # it only traverses commits created during the current streaming
776 # it only traverses commits created during the current streaming
777 # pull operation.
777 # pull operation.
778 #
778 #
779 # If this become a problem, using a binary search should cap the
779 # If this become a problem, using a binary search should cap the
780 # runtime of this.
780 # runtime of this.
781 n = n - 1
781 n = n - 1
782 if n == 0:
782 if n == 0:
783 # no data to send
783 # no data to send
784 return []
784 return []
785 index_size = n * index.entry_size
785 index_size = n * index.entry_size
786 data_size = self.end(n - 1)
786 data_size = self.end(n - 1)
787
787
788 # XXX we might have been split (or stripped) since the object
788 # XXX we might have been split (or stripped) since the object
789 # initialization, We need to close this race too, but having a way to
789 # initialization, We need to close this race too, but having a way to
790 # pre-open the file we feed to the revlog and never closing them before
790 # pre-open the file we feed to the revlog and never closing them before
791 # we are done streaming.
791 # we are done streaming.
792
792
793 if self._inline:
793 if self._inline:
794
794
795 def get_stream():
795 def get_stream():
796 with self._indexfp() as fp:
796 with self.opener(self._indexfile, mode=b"r") as fp:
797 yield None
797 yield None
798 size = index_size + data_size
798 size = index_size + data_size
799 if size <= 65536:
799 if size <= 65536:
800 yield fp.read(size)
800 yield fp.read(size)
801 else:
801 else:
802 yield from util.filechunkiter(fp, limit=size)
802 yield from util.filechunkiter(fp, limit=size)
803
803
804 inline_stream = get_stream()
804 inline_stream = get_stream()
805 next(inline_stream)
805 next(inline_stream)
806 return [
806 return [
807 (self._indexfile, inline_stream, index_size + data_size),
807 (self._indexfile, inline_stream, index_size + data_size),
808 ]
808 ]
809 elif force_inline:
809 elif force_inline:
810
810
811 def get_stream():
811 def get_stream():
812 with self.reading():
812 with self.reading():
813 yield None
813 yield None
814
814
815 for rev in range(n):
815 for rev in range(n):
816 idx = self.index.entry_binary(rev)
816 idx = self.index.entry_binary(rev)
817 if rev == 0 and self._docket is None:
817 if rev == 0 and self._docket is None:
818 # re-inject the inline flag
818 # re-inject the inline flag
819 header = self._format_flags
819 header = self._format_flags
820 header |= self._format_version
820 header |= self._format_version
821 header |= FLAG_INLINE_DATA
821 header |= FLAG_INLINE_DATA
822 header = self.index.pack_header(header)
822 header = self.index.pack_header(header)
823 idx = header + idx
823 idx = header + idx
824 yield idx
824 yield idx
825 yield self._getsegmentforrevs(rev, rev)[1]
825 yield self._getsegmentforrevs(rev, rev)[1]
826
826
827 inline_stream = get_stream()
827 inline_stream = get_stream()
828 next(inline_stream)
828 next(inline_stream)
829 return [
829 return [
830 (self._indexfile, inline_stream, index_size + data_size),
830 (self._indexfile, inline_stream, index_size + data_size),
831 ]
831 ]
832 else:
832 else:
833
833
834 def get_index_stream():
834 def get_index_stream():
835 with self._indexfp() as fp:
835 with self.opener(self._indexfile, mode=b"r") as fp:
836 yield None
836 yield None
837 if index_size <= 65536:
837 if index_size <= 65536:
838 yield fp.read(index_size)
838 yield fp.read(index_size)
839 else:
839 else:
840 yield from util.filechunkiter(fp, limit=index_size)
840 yield from util.filechunkiter(fp, limit=index_size)
841
841
842 def get_data_stream():
842 def get_data_stream():
843 with self._datafp() as fp:
843 with self._datafp() as fp:
844 yield None
844 yield None
845 if data_size <= 65536:
845 if data_size <= 65536:
846 yield fp.read(data_size)
846 yield fp.read(data_size)
847 else:
847 else:
848 yield from util.filechunkiter(fp, limit=data_size)
848 yield from util.filechunkiter(fp, limit=data_size)
849
849
850 index_stream = get_index_stream()
850 index_stream = get_index_stream()
851 next(index_stream)
851 next(index_stream)
852 data_stream = get_data_stream()
852 data_stream = get_data_stream()
853 next(data_stream)
853 next(data_stream)
854 return [
854 return [
855 (self._datafile, data_stream, data_size),
855 (self._datafile, data_stream, data_size),
856 (self._indexfile, index_stream, index_size),
856 (self._indexfile, index_stream, index_size),
857 ]
857 ]
858
858
859 def _loadindex(self, docket=None):
859 def _loadindex(self, docket=None):
860
860
861 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
861 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
862
862
863 if self.postfix is not None:
863 if self.postfix is not None:
864 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
864 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
865 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
865 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
866 entry_point = b'%s.i.a' % self.radix
866 entry_point = b'%s.i.a' % self.radix
867 elif self._try_split and self.opener.exists(self._split_index_file):
867 elif self._try_split and self.opener.exists(self._split_index_file):
868 entry_point = self._split_index_file
868 entry_point = self._split_index_file
869 else:
869 else:
870 entry_point = b'%s.i' % self.radix
870 entry_point = b'%s.i' % self.radix
871
871
872 if docket is not None:
872 if docket is not None:
873 self._docket = docket
873 self._docket = docket
874 self._docket_file = entry_point
874 self._docket_file = entry_point
875 else:
875 else:
876 self._initempty = True
876 self._initempty = True
877 entry_data = self._get_data(entry_point, mmapindexthreshold)
877 entry_data = self._get_data(entry_point, mmapindexthreshold)
878 if len(entry_data) > 0:
878 if len(entry_data) > 0:
879 header = INDEX_HEADER.unpack(entry_data[:4])[0]
879 header = INDEX_HEADER.unpack(entry_data[:4])[0]
880 self._initempty = False
880 self._initempty = False
881 else:
881 else:
882 header = new_header
882 header = new_header
883
883
884 self._format_flags = header & ~0xFFFF
884 self._format_flags = header & ~0xFFFF
885 self._format_version = header & 0xFFFF
885 self._format_version = header & 0xFFFF
886
886
887 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
887 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
888 if supported_flags is None:
888 if supported_flags is None:
889 msg = _(b'unknown version (%d) in revlog %s')
889 msg = _(b'unknown version (%d) in revlog %s')
890 msg %= (self._format_version, self.display_id)
890 msg %= (self._format_version, self.display_id)
891 raise error.RevlogError(msg)
891 raise error.RevlogError(msg)
892 elif self._format_flags & ~supported_flags:
892 elif self._format_flags & ~supported_flags:
893 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
893 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
894 display_flag = self._format_flags >> 16
894 display_flag = self._format_flags >> 16
895 msg %= (display_flag, self._format_version, self.display_id)
895 msg %= (display_flag, self._format_version, self.display_id)
896 raise error.RevlogError(msg)
896 raise error.RevlogError(msg)
897
897
898 features = FEATURES_BY_VERSION[self._format_version]
898 features = FEATURES_BY_VERSION[self._format_version]
899 self._inline = features[b'inline'](self._format_flags)
899 self._inline = features[b'inline'](self._format_flags)
900 self.delta_config.general_delta = features[b'generaldelta'](
900 self.delta_config.general_delta = features[b'generaldelta'](
901 self._format_flags
901 self._format_flags
902 )
902 )
903 self.feature_config.has_side_data = features[b'sidedata']
903 self.feature_config.has_side_data = features[b'sidedata']
904
904
905 if not features[b'docket']:
905 if not features[b'docket']:
906 self._indexfile = entry_point
906 self._indexfile = entry_point
907 index_data = entry_data
907 index_data = entry_data
908 else:
908 else:
909 self._docket_file = entry_point
909 self._docket_file = entry_point
910 if self._initempty:
910 if self._initempty:
911 self._docket = docketutil.default_docket(self, header)
911 self._docket = docketutil.default_docket(self, header)
912 else:
912 else:
913 self._docket = docketutil.parse_docket(
913 self._docket = docketutil.parse_docket(
914 self, entry_data, use_pending=self._trypending
914 self, entry_data, use_pending=self._trypending
915 )
915 )
916
916
917 if self._docket is not None:
917 if self._docket is not None:
918 self._indexfile = self._docket.index_filepath()
918 self._indexfile = self._docket.index_filepath()
919 index_data = b''
919 index_data = b''
920 index_size = self._docket.index_end
920 index_size = self._docket.index_end
921 if index_size > 0:
921 if index_size > 0:
922 index_data = self._get_data(
922 index_data = self._get_data(
923 self._indexfile, mmapindexthreshold, size=index_size
923 self._indexfile, mmapindexthreshold, size=index_size
924 )
924 )
925 if len(index_data) < index_size:
925 if len(index_data) < index_size:
926 msg = _(b'too few index data for %s: got %d, expected %d')
926 msg = _(b'too few index data for %s: got %d, expected %d')
927 msg %= (self.display_id, len(index_data), index_size)
927 msg %= (self.display_id, len(index_data), index_size)
928 raise error.RevlogError(msg)
928 raise error.RevlogError(msg)
929
929
930 self._inline = False
930 self._inline = False
931 # generaldelta implied by version 2 revlogs.
931 # generaldelta implied by version 2 revlogs.
932 self.delta_config.general_delta = True
932 self.delta_config.general_delta = True
933 # the logic for persistent nodemap will be dealt with within the
933 # the logic for persistent nodemap will be dealt with within the
934 # main docket, so disable it for now.
934 # main docket, so disable it for now.
935 self._nodemap_file = None
935 self._nodemap_file = None
936
936
937 if self._docket is not None:
937 if self._docket is not None:
938 self._datafile = self._docket.data_filepath()
938 self._datafile = self._docket.data_filepath()
939 self._sidedatafile = self._docket.sidedata_filepath()
939 self._sidedatafile = self._docket.sidedata_filepath()
940 elif self.postfix is None:
940 elif self.postfix is None:
941 self._datafile = b'%s.d' % self.radix
941 self._datafile = b'%s.d' % self.radix
942 else:
942 else:
943 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
943 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
944
944
945 self.nodeconstants = sha1nodeconstants
945 self.nodeconstants = sha1nodeconstants
946 self.nullid = self.nodeconstants.nullid
946 self.nullid = self.nodeconstants.nullid
947
947
948 # sparse-revlog can't be on without general-delta (issue6056)
948 # sparse-revlog can't be on without general-delta (issue6056)
949 if not self.delta_config.general_delta:
949 if not self.delta_config.general_delta:
950 self.delta_config.sparse_revlog = False
950 self.delta_config.sparse_revlog = False
951
951
952 self._storedeltachains = True
952 self._storedeltachains = True
953
953
954 devel_nodemap = (
954 devel_nodemap = (
955 self._nodemap_file
955 self._nodemap_file
956 and force_nodemap
956 and force_nodemap
957 and parse_index_v1_nodemap is not None
957 and parse_index_v1_nodemap is not None
958 )
958 )
959
959
960 use_rust_index = False
960 use_rust_index = False
961 if rustrevlog is not None:
961 if rustrevlog is not None:
962 if self._nodemap_file is not None:
962 if self._nodemap_file is not None:
963 use_rust_index = True
963 use_rust_index = True
964 else:
964 else:
965 use_rust_index = self.opener.options.get(b'rust.index')
965 use_rust_index = self.opener.options.get(b'rust.index')
966
966
967 self._parse_index = parse_index_v1
967 self._parse_index = parse_index_v1
968 if self._format_version == REVLOGV0:
968 if self._format_version == REVLOGV0:
969 self._parse_index = revlogv0.parse_index_v0
969 self._parse_index = revlogv0.parse_index_v0
970 elif self._format_version == REVLOGV2:
970 elif self._format_version == REVLOGV2:
971 self._parse_index = parse_index_v2
971 self._parse_index = parse_index_v2
972 elif self._format_version == CHANGELOGV2:
972 elif self._format_version == CHANGELOGV2:
973 self._parse_index = parse_index_cl_v2
973 self._parse_index = parse_index_cl_v2
974 elif devel_nodemap:
974 elif devel_nodemap:
975 self._parse_index = parse_index_v1_nodemap
975 self._parse_index = parse_index_v1_nodemap
976 elif use_rust_index:
976 elif use_rust_index:
977 self._parse_index = parse_index_v1_mixed
977 self._parse_index = parse_index_v1_mixed
978 try:
978 try:
979 d = self._parse_index(index_data, self._inline)
979 d = self._parse_index(index_data, self._inline)
980 index, chunkcache = d
980 index, chunkcache = d
981 use_nodemap = (
981 use_nodemap = (
982 not self._inline
982 not self._inline
983 and self._nodemap_file is not None
983 and self._nodemap_file is not None
984 and hasattr(index, 'update_nodemap_data')
984 and hasattr(index, 'update_nodemap_data')
985 )
985 )
986 if use_nodemap:
986 if use_nodemap:
987 nodemap_data = nodemaputil.persisted_data(self)
987 nodemap_data = nodemaputil.persisted_data(self)
988 if nodemap_data is not None:
988 if nodemap_data is not None:
989 docket = nodemap_data[0]
989 docket = nodemap_data[0]
990 if (
990 if (
991 len(d[0]) > docket.tip_rev
991 len(d[0]) > docket.tip_rev
992 and d[0][docket.tip_rev][7] == docket.tip_node
992 and d[0][docket.tip_rev][7] == docket.tip_node
993 ):
993 ):
994 # no changelog tampering
994 # no changelog tampering
995 self._nodemap_docket = docket
995 self._nodemap_docket = docket
996 index.update_nodemap_data(*nodemap_data)
996 index.update_nodemap_data(*nodemap_data)
997 except (ValueError, IndexError):
997 except (ValueError, IndexError):
998 raise error.RevlogError(
998 raise error.RevlogError(
999 _(b"index %s is corrupted") % self.display_id
999 _(b"index %s is corrupted") % self.display_id
1000 )
1000 )
1001 self.index = index
1001 self.index = index
1002 self._segmentfile = randomaccessfile.randomaccessfile(
1002 self._segmentfile = randomaccessfile.randomaccessfile(
1003 self.opener,
1003 self.opener,
1004 (self._indexfile if self._inline else self._datafile),
1004 (self._indexfile if self._inline else self._datafile),
1005 self.data_config.chunk_cache_size,
1005 self.data_config.chunk_cache_size,
1006 chunkcache,
1006 chunkcache,
1007 )
1007 )
1008 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1008 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1009 self.opener,
1009 self.opener,
1010 self._sidedatafile,
1010 self._sidedatafile,
1011 self.data_config.chunk_cache_size,
1011 self.data_config.chunk_cache_size,
1012 )
1012 )
1013 # revnum -> (chain-length, sum-delta-length)
1013 # revnum -> (chain-length, sum-delta-length)
1014 self._chaininfocache = util.lrucachedict(500)
1014 self._chaininfocache = util.lrucachedict(500)
1015 # revlog header -> revlog compressor
1015 # revlog header -> revlog compressor
1016 self._decompressors = {}
1016 self._decompressors = {}
1017
1017
1018 def get_revlog(self):
1018 def get_revlog(self):
1019 """simple function to mirror API of other not-really-revlog API"""
1019 """simple function to mirror API of other not-really-revlog API"""
1020 return self
1020 return self
1021
1021
1022 @util.propertycache
1022 @util.propertycache
1023 def revlog_kind(self):
1023 def revlog_kind(self):
1024 return self.target[0]
1024 return self.target[0]
1025
1025
1026 @util.propertycache
1026 @util.propertycache
1027 def display_id(self):
1027 def display_id(self):
1028 """The public facing "ID" of the revlog that we use in message"""
1028 """The public facing "ID" of the revlog that we use in message"""
1029 if self.revlog_kind == KIND_FILELOG:
1029 if self.revlog_kind == KIND_FILELOG:
1030 # Reference the file without the "data/" prefix, so it is familiar
1030 # Reference the file without the "data/" prefix, so it is familiar
1031 # to the user.
1031 # to the user.
1032 return self.target[1]
1032 return self.target[1]
1033 else:
1033 else:
1034 return self.radix
1034 return self.radix
1035
1035
1036 def _get_decompressor(self, t):
1036 def _get_decompressor(self, t):
1037 try:
1037 try:
1038 compressor = self._decompressors[t]
1038 compressor = self._decompressors[t]
1039 except KeyError:
1039 except KeyError:
1040 try:
1040 try:
1041 engine = util.compengines.forrevlogheader(t)
1041 engine = util.compengines.forrevlogheader(t)
1042 compressor = engine.revlogcompressor(
1042 compressor = engine.revlogcompressor(
1043 self.feature_config.compression_engine_options
1043 self.feature_config.compression_engine_options
1044 )
1044 )
1045 self._decompressors[t] = compressor
1045 self._decompressors[t] = compressor
1046 except KeyError:
1046 except KeyError:
1047 raise error.RevlogError(
1047 raise error.RevlogError(
1048 _(b'unknown compression type %s') % binascii.hexlify(t)
1048 _(b'unknown compression type %s') % binascii.hexlify(t)
1049 )
1049 )
1050 return compressor
1050 return compressor
1051
1051
1052 @util.propertycache
1052 @util.propertycache
1053 def _compressor(self):
1053 def _compressor(self):
1054 engine = util.compengines[self.feature_config.compression_engine]
1054 engine = util.compengines[self.feature_config.compression_engine]
1055 return engine.revlogcompressor(
1055 return engine.revlogcompressor(
1056 self.feature_config.compression_engine_options
1056 self.feature_config.compression_engine_options
1057 )
1057 )
1058
1058
1059 @util.propertycache
1059 @util.propertycache
1060 def _decompressor(self):
1060 def _decompressor(self):
1061 """the default decompressor"""
1061 """the default decompressor"""
1062 if self._docket is None:
1062 if self._docket is None:
1063 return None
1063 return None
1064 t = self._docket.default_compression_header
1064 t = self._docket.default_compression_header
1065 c = self._get_decompressor(t)
1065 c = self._get_decompressor(t)
1066 return c.decompress
1066 return c.decompress
1067
1067
1068 def _indexfp(self):
1069 """file object for the revlog's index file"""
1070 return self.opener(self._indexfile, mode=b"r")
1071
1072 def __index_write_fp(self):
1068 def __index_write_fp(self):
1073 # You should not use this directly and use `_writing` instead
1069 # You should not use this directly and use `_writing` instead
1074 try:
1070 try:
1075 f = self.opener(
1071 f = self.opener(
1076 self._indexfile,
1072 self._indexfile,
1077 mode=b"r+",
1073 mode=b"r+",
1078 checkambig=self.data_config.check_ambig,
1074 checkambig=self.data_config.check_ambig,
1079 )
1075 )
1080 if self._docket is None:
1076 if self._docket is None:
1081 f.seek(0, os.SEEK_END)
1077 f.seek(0, os.SEEK_END)
1082 else:
1078 else:
1083 f.seek(self._docket.index_end, os.SEEK_SET)
1079 f.seek(self._docket.index_end, os.SEEK_SET)
1084 return f
1080 return f
1085 except FileNotFoundError:
1081 except FileNotFoundError:
1086 return self.opener(
1082 return self.opener(
1087 self._indexfile,
1083 self._indexfile,
1088 mode=b"w+",
1084 mode=b"w+",
1089 checkambig=self.data_config.check_ambig,
1085 checkambig=self.data_config.check_ambig,
1090 )
1086 )
1091
1087
1092 def __index_new_fp(self):
1088 def __index_new_fp(self):
1093 # You should not use this unless you are upgrading from inline revlog
1089 # You should not use this unless you are upgrading from inline revlog
1094 return self.opener(
1090 return self.opener(
1095 self._indexfile,
1091 self._indexfile,
1096 mode=b"w",
1092 mode=b"w",
1097 checkambig=self.data_config.check_ambig,
1093 checkambig=self.data_config.check_ambig,
1098 atomictemp=True,
1094 atomictemp=True,
1099 )
1095 )
1100
1096
1101 def _datafp(self, mode=b'r'):
1097 def _datafp(self, mode=b'r'):
1102 """file object for the revlog's data file"""
1098 """file object for the revlog's data file"""
1103 return self.opener(self._datafile, mode=mode)
1099 return self.opener(self._datafile, mode=mode)
1104
1100
1105 @contextlib.contextmanager
1101 @contextlib.contextmanager
1106 def _sidedatareadfp(self):
1102 def _sidedatareadfp(self):
1107 """file object suitable to read sidedata"""
1103 """file object suitable to read sidedata"""
1108 if self._writinghandles:
1104 if self._writinghandles:
1109 yield self._writinghandles[2]
1105 yield self._writinghandles[2]
1110 else:
1106 else:
1111 with self.opener(self._sidedatafile) as fp:
1107 with self.opener(self._sidedatafile) as fp:
1112 yield fp
1108 yield fp
1113
1109
1114 def tiprev(self):
1110 def tiprev(self):
1115 return len(self.index) - 1
1111 return len(self.index) - 1
1116
1112
1117 def tip(self):
1113 def tip(self):
1118 return self.node(self.tiprev())
1114 return self.node(self.tiprev())
1119
1115
1120 def __contains__(self, rev):
1116 def __contains__(self, rev):
1121 return 0 <= rev < len(self)
1117 return 0 <= rev < len(self)
1122
1118
1123 def __len__(self):
1119 def __len__(self):
1124 return len(self.index)
1120 return len(self.index)
1125
1121
1126 def __iter__(self):
1122 def __iter__(self):
1127 return iter(range(len(self)))
1123 return iter(range(len(self)))
1128
1124
1129 def revs(self, start=0, stop=None):
1125 def revs(self, start=0, stop=None):
1130 """iterate over all rev in this revlog (from start to stop)"""
1126 """iterate over all rev in this revlog (from start to stop)"""
1131 return storageutil.iterrevs(len(self), start=start, stop=stop)
1127 return storageutil.iterrevs(len(self), start=start, stop=stop)
1132
1128
1133 def hasnode(self, node):
1129 def hasnode(self, node):
1134 try:
1130 try:
1135 self.rev(node)
1131 self.rev(node)
1136 return True
1132 return True
1137 except KeyError:
1133 except KeyError:
1138 return False
1134 return False
1139
1135
1140 def _candelta(self, baserev, rev):
1136 def _candelta(self, baserev, rev):
1141 """whether two revisions (baserev, rev) can be delta-ed or not"""
1137 """whether two revisions (baserev, rev) can be delta-ed or not"""
1142 # Disable delta if either rev requires a content-changing flag
1138 # Disable delta if either rev requires a content-changing flag
1143 # processor (ex. LFS). This is because such flag processor can alter
1139 # processor (ex. LFS). This is because such flag processor can alter
1144 # the rawtext content that the delta will be based on, and two clients
1140 # the rawtext content that the delta will be based on, and two clients
1145 # could have a same revlog node with different flags (i.e. different
1141 # could have a same revlog node with different flags (i.e. different
1146 # rawtext contents) and the delta could be incompatible.
1142 # rawtext contents) and the delta could be incompatible.
1147 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1143 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1148 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1144 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1149 ):
1145 ):
1150 return False
1146 return False
1151 return True
1147 return True
1152
1148
1153 def update_caches(self, transaction):
1149 def update_caches(self, transaction):
1154 """update on disk cache
1150 """update on disk cache
1155
1151
1156 If a transaction is passed, the update may be delayed to transaction
1152 If a transaction is passed, the update may be delayed to transaction
1157 commit."""
1153 commit."""
1158 if self._nodemap_file is not None:
1154 if self._nodemap_file is not None:
1159 if transaction is None:
1155 if transaction is None:
1160 nodemaputil.update_persistent_nodemap(self)
1156 nodemaputil.update_persistent_nodemap(self)
1161 else:
1157 else:
1162 nodemaputil.setup_persistent_nodemap(transaction, self)
1158 nodemaputil.setup_persistent_nodemap(transaction, self)
1163
1159
1164 def clearcaches(self):
1160 def clearcaches(self):
1165 """Clear in-memory caches"""
1161 """Clear in-memory caches"""
1166 self._revisioncache = None
1162 self._revisioncache = None
1167 self._chainbasecache.clear()
1163 self._chainbasecache.clear()
1168 self._segmentfile.clear_cache()
1164 self._segmentfile.clear_cache()
1169 self._segmentfile_sidedata.clear_cache()
1165 self._segmentfile_sidedata.clear_cache()
1170 self._pcache = {}
1166 self._pcache = {}
1171 self._nodemap_docket = None
1167 self._nodemap_docket = None
1172 self.index.clearcaches()
1168 self.index.clearcaches()
1173 # The python code is the one responsible for validating the docket, we
1169 # The python code is the one responsible for validating the docket, we
1174 # end up having to refresh it here.
1170 # end up having to refresh it here.
1175 use_nodemap = (
1171 use_nodemap = (
1176 not self._inline
1172 not self._inline
1177 and self._nodemap_file is not None
1173 and self._nodemap_file is not None
1178 and hasattr(self.index, 'update_nodemap_data')
1174 and hasattr(self.index, 'update_nodemap_data')
1179 )
1175 )
1180 if use_nodemap:
1176 if use_nodemap:
1181 nodemap_data = nodemaputil.persisted_data(self)
1177 nodemap_data = nodemaputil.persisted_data(self)
1182 if nodemap_data is not None:
1178 if nodemap_data is not None:
1183 self._nodemap_docket = nodemap_data[0]
1179 self._nodemap_docket = nodemap_data[0]
1184 self.index.update_nodemap_data(*nodemap_data)
1180 self.index.update_nodemap_data(*nodemap_data)
1185
1181
1186 def rev(self, node):
1182 def rev(self, node):
1187 """return the revision number associated with a <nodeid>"""
1183 """return the revision number associated with a <nodeid>"""
1188 try:
1184 try:
1189 return self.index.rev(node)
1185 return self.index.rev(node)
1190 except TypeError:
1186 except TypeError:
1191 raise
1187 raise
1192 except error.RevlogError:
1188 except error.RevlogError:
1193 # parsers.c radix tree lookup failed
1189 # parsers.c radix tree lookup failed
1194 if (
1190 if (
1195 node == self.nodeconstants.wdirid
1191 node == self.nodeconstants.wdirid
1196 or node in self.nodeconstants.wdirfilenodeids
1192 or node in self.nodeconstants.wdirfilenodeids
1197 ):
1193 ):
1198 raise error.WdirUnsupported
1194 raise error.WdirUnsupported
1199 raise error.LookupError(node, self.display_id, _(b'no node'))
1195 raise error.LookupError(node, self.display_id, _(b'no node'))
1200
1196
1201 # Accessors for index entries.
1197 # Accessors for index entries.
1202
1198
1203 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1199 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1204 # are flags.
1200 # are flags.
1205 def start(self, rev):
1201 def start(self, rev):
1206 return int(self.index[rev][0] >> 16)
1202 return int(self.index[rev][0] >> 16)
1207
1203
1208 def sidedata_cut_off(self, rev):
1204 def sidedata_cut_off(self, rev):
1209 sd_cut_off = self.index[rev][8]
1205 sd_cut_off = self.index[rev][8]
1210 if sd_cut_off != 0:
1206 if sd_cut_off != 0:
1211 return sd_cut_off
1207 return sd_cut_off
1212 # This is some annoying dance, because entries without sidedata
1208 # This is some annoying dance, because entries without sidedata
1213 # currently use 0 as their ofsset. (instead of previous-offset +
1209 # currently use 0 as their ofsset. (instead of previous-offset +
1214 # previous-size)
1210 # previous-size)
1215 #
1211 #
1216 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1212 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1217 # In the meantime, we need this.
1213 # In the meantime, we need this.
1218 while 0 <= rev:
1214 while 0 <= rev:
1219 e = self.index[rev]
1215 e = self.index[rev]
1220 if e[9] != 0:
1216 if e[9] != 0:
1221 return e[8] + e[9]
1217 return e[8] + e[9]
1222 rev -= 1
1218 rev -= 1
1223 return 0
1219 return 0
1224
1220
1225 def flags(self, rev):
1221 def flags(self, rev):
1226 return self.index[rev][0] & 0xFFFF
1222 return self.index[rev][0] & 0xFFFF
1227
1223
1228 def length(self, rev):
1224 def length(self, rev):
1229 return self.index[rev][1]
1225 return self.index[rev][1]
1230
1226
1231 def sidedata_length(self, rev):
1227 def sidedata_length(self, rev):
1232 if not self.feature_config.has_side_data:
1228 if not self.feature_config.has_side_data:
1233 return 0
1229 return 0
1234 return self.index[rev][9]
1230 return self.index[rev][9]
1235
1231
1236 def rawsize(self, rev):
1232 def rawsize(self, rev):
1237 """return the length of the uncompressed text for a given revision"""
1233 """return the length of the uncompressed text for a given revision"""
1238 l = self.index[rev][2]
1234 l = self.index[rev][2]
1239 if l >= 0:
1235 if l >= 0:
1240 return l
1236 return l
1241
1237
1242 t = self.rawdata(rev)
1238 t = self.rawdata(rev)
1243 return len(t)
1239 return len(t)
1244
1240
1245 def size(self, rev):
1241 def size(self, rev):
1246 """length of non-raw text (processed by a "read" flag processor)"""
1242 """length of non-raw text (processed by a "read" flag processor)"""
1247 # fast path: if no "read" flag processor could change the content,
1243 # fast path: if no "read" flag processor could change the content,
1248 # size is rawsize. note: ELLIPSIS is known to not change the content.
1244 # size is rawsize. note: ELLIPSIS is known to not change the content.
1249 flags = self.flags(rev)
1245 flags = self.flags(rev)
1250 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1246 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1251 return self.rawsize(rev)
1247 return self.rawsize(rev)
1252
1248
1253 return len(self.revision(rev))
1249 return len(self.revision(rev))
1254
1250
1255 def fast_rank(self, rev):
1251 def fast_rank(self, rev):
1256 """Return the rank of a revision if already known, or None otherwise.
1252 """Return the rank of a revision if already known, or None otherwise.
1257
1253
1258 The rank of a revision is the size of the sub-graph it defines as a
1254 The rank of a revision is the size of the sub-graph it defines as a
1259 head. Equivalently, the rank of a revision `r` is the size of the set
1255 head. Equivalently, the rank of a revision `r` is the size of the set
1260 `ancestors(r)`, `r` included.
1256 `ancestors(r)`, `r` included.
1261
1257
1262 This method returns the rank retrieved from the revlog in constant
1258 This method returns the rank retrieved from the revlog in constant
1263 time. It makes no attempt at computing unknown values for versions of
1259 time. It makes no attempt at computing unknown values for versions of
1264 the revlog which do not persist the rank.
1260 the revlog which do not persist the rank.
1265 """
1261 """
1266 rank = self.index[rev][ENTRY_RANK]
1262 rank = self.index[rev][ENTRY_RANK]
1267 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1263 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1268 return None
1264 return None
1269 if rev == nullrev:
1265 if rev == nullrev:
1270 return 0 # convention
1266 return 0 # convention
1271 return rank
1267 return rank
1272
1268
1273 def chainbase(self, rev):
1269 def chainbase(self, rev):
1274 base = self._chainbasecache.get(rev)
1270 base = self._chainbasecache.get(rev)
1275 if base is not None:
1271 if base is not None:
1276 return base
1272 return base
1277
1273
1278 index = self.index
1274 index = self.index
1279 iterrev = rev
1275 iterrev = rev
1280 base = index[iterrev][3]
1276 base = index[iterrev][3]
1281 while base != iterrev:
1277 while base != iterrev:
1282 iterrev = base
1278 iterrev = base
1283 base = index[iterrev][3]
1279 base = index[iterrev][3]
1284
1280
1285 self._chainbasecache[rev] = base
1281 self._chainbasecache[rev] = base
1286 return base
1282 return base
1287
1283
1288 def linkrev(self, rev):
1284 def linkrev(self, rev):
1289 return self.index[rev][4]
1285 return self.index[rev][4]
1290
1286
1291 def parentrevs(self, rev):
1287 def parentrevs(self, rev):
1292 try:
1288 try:
1293 entry = self.index[rev]
1289 entry = self.index[rev]
1294 except IndexError:
1290 except IndexError:
1295 if rev == wdirrev:
1291 if rev == wdirrev:
1296 raise error.WdirUnsupported
1292 raise error.WdirUnsupported
1297 raise
1293 raise
1298
1294
1299 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1295 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1300 return entry[6], entry[5]
1296 return entry[6], entry[5]
1301 else:
1297 else:
1302 return entry[5], entry[6]
1298 return entry[5], entry[6]
1303
1299
1304 # fast parentrevs(rev) where rev isn't filtered
1300 # fast parentrevs(rev) where rev isn't filtered
1305 _uncheckedparentrevs = parentrevs
1301 _uncheckedparentrevs = parentrevs
1306
1302
1307 def node(self, rev):
1303 def node(self, rev):
1308 try:
1304 try:
1309 return self.index[rev][7]
1305 return self.index[rev][7]
1310 except IndexError:
1306 except IndexError:
1311 if rev == wdirrev:
1307 if rev == wdirrev:
1312 raise error.WdirUnsupported
1308 raise error.WdirUnsupported
1313 raise
1309 raise
1314
1310
1315 # Derived from index values.
1311 # Derived from index values.
1316
1312
1317 def end(self, rev):
1313 def end(self, rev):
1318 return self.start(rev) + self.length(rev)
1314 return self.start(rev) + self.length(rev)
1319
1315
1320 def parents(self, node):
1316 def parents(self, node):
1321 i = self.index
1317 i = self.index
1322 d = i[self.rev(node)]
1318 d = i[self.rev(node)]
1323 # inline node() to avoid function call overhead
1319 # inline node() to avoid function call overhead
1324 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1320 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1325 return i[d[6]][7], i[d[5]][7]
1321 return i[d[6]][7], i[d[5]][7]
1326 else:
1322 else:
1327 return i[d[5]][7], i[d[6]][7]
1323 return i[d[5]][7], i[d[6]][7]
1328
1324
1329 def chainlen(self, rev):
1325 def chainlen(self, rev):
1330 return self._chaininfo(rev)[0]
1326 return self._chaininfo(rev)[0]
1331
1327
1332 def _chaininfo(self, rev):
1328 def _chaininfo(self, rev):
1333 chaininfocache = self._chaininfocache
1329 chaininfocache = self._chaininfocache
1334 if rev in chaininfocache:
1330 if rev in chaininfocache:
1335 return chaininfocache[rev]
1331 return chaininfocache[rev]
1336 index = self.index
1332 index = self.index
1337 generaldelta = self.delta_config.general_delta
1333 generaldelta = self.delta_config.general_delta
1338 iterrev = rev
1334 iterrev = rev
1339 e = index[iterrev]
1335 e = index[iterrev]
1340 clen = 0
1336 clen = 0
1341 compresseddeltalen = 0
1337 compresseddeltalen = 0
1342 while iterrev != e[3]:
1338 while iterrev != e[3]:
1343 clen += 1
1339 clen += 1
1344 compresseddeltalen += e[1]
1340 compresseddeltalen += e[1]
1345 if generaldelta:
1341 if generaldelta:
1346 iterrev = e[3]
1342 iterrev = e[3]
1347 else:
1343 else:
1348 iterrev -= 1
1344 iterrev -= 1
1349 if iterrev in chaininfocache:
1345 if iterrev in chaininfocache:
1350 t = chaininfocache[iterrev]
1346 t = chaininfocache[iterrev]
1351 clen += t[0]
1347 clen += t[0]
1352 compresseddeltalen += t[1]
1348 compresseddeltalen += t[1]
1353 break
1349 break
1354 e = index[iterrev]
1350 e = index[iterrev]
1355 else:
1351 else:
1356 # Add text length of base since decompressing that also takes
1352 # Add text length of base since decompressing that also takes
1357 # work. For cache hits the length is already included.
1353 # work. For cache hits the length is already included.
1358 compresseddeltalen += e[1]
1354 compresseddeltalen += e[1]
1359 r = (clen, compresseddeltalen)
1355 r = (clen, compresseddeltalen)
1360 chaininfocache[rev] = r
1356 chaininfocache[rev] = r
1361 return r
1357 return r
1362
1358
1363 def _deltachain(self, rev, stoprev=None):
1359 def _deltachain(self, rev, stoprev=None):
1364 """Obtain the delta chain for a revision.
1360 """Obtain the delta chain for a revision.
1365
1361
1366 ``stoprev`` specifies a revision to stop at. If not specified, we
1362 ``stoprev`` specifies a revision to stop at. If not specified, we
1367 stop at the base of the chain.
1363 stop at the base of the chain.
1368
1364
1369 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1365 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1370 revs in ascending order and ``stopped`` is a bool indicating whether
1366 revs in ascending order and ``stopped`` is a bool indicating whether
1371 ``stoprev`` was hit.
1367 ``stoprev`` was hit.
1372 """
1368 """
1373 generaldelta = self.delta_config.general_delta
1369 generaldelta = self.delta_config.general_delta
1374 # Try C implementation.
1370 # Try C implementation.
1375 try:
1371 try:
1376 return self.index.deltachain(rev, stoprev, generaldelta)
1372 return self.index.deltachain(rev, stoprev, generaldelta)
1377 except AttributeError:
1373 except AttributeError:
1378 pass
1374 pass
1379
1375
1380 chain = []
1376 chain = []
1381
1377
1382 # Alias to prevent attribute lookup in tight loop.
1378 # Alias to prevent attribute lookup in tight loop.
1383 index = self.index
1379 index = self.index
1384
1380
1385 iterrev = rev
1381 iterrev = rev
1386 e = index[iterrev]
1382 e = index[iterrev]
1387 while iterrev != e[3] and iterrev != stoprev:
1383 while iterrev != e[3] and iterrev != stoprev:
1388 chain.append(iterrev)
1384 chain.append(iterrev)
1389 if generaldelta:
1385 if generaldelta:
1390 iterrev = e[3]
1386 iterrev = e[3]
1391 else:
1387 else:
1392 iterrev -= 1
1388 iterrev -= 1
1393 e = index[iterrev]
1389 e = index[iterrev]
1394
1390
1395 if iterrev == stoprev:
1391 if iterrev == stoprev:
1396 stopped = True
1392 stopped = True
1397 else:
1393 else:
1398 chain.append(iterrev)
1394 chain.append(iterrev)
1399 stopped = False
1395 stopped = False
1400
1396
1401 chain.reverse()
1397 chain.reverse()
1402 return chain, stopped
1398 return chain, stopped
1403
1399
1404 def ancestors(self, revs, stoprev=0, inclusive=False):
1400 def ancestors(self, revs, stoprev=0, inclusive=False):
1405 """Generate the ancestors of 'revs' in reverse revision order.
1401 """Generate the ancestors of 'revs' in reverse revision order.
1406 Does not generate revs lower than stoprev.
1402 Does not generate revs lower than stoprev.
1407
1403
1408 See the documentation for ancestor.lazyancestors for more details."""
1404 See the documentation for ancestor.lazyancestors for more details."""
1409
1405
1410 # first, make sure start revisions aren't filtered
1406 # first, make sure start revisions aren't filtered
1411 revs = list(revs)
1407 revs = list(revs)
1412 checkrev = self.node
1408 checkrev = self.node
1413 for r in revs:
1409 for r in revs:
1414 checkrev(r)
1410 checkrev(r)
1415 # and we're sure ancestors aren't filtered as well
1411 # and we're sure ancestors aren't filtered as well
1416
1412
1417 if rustancestor is not None and self.index.rust_ext_compat:
1413 if rustancestor is not None and self.index.rust_ext_compat:
1418 lazyancestors = rustancestor.LazyAncestors
1414 lazyancestors = rustancestor.LazyAncestors
1419 arg = self.index
1415 arg = self.index
1420 else:
1416 else:
1421 lazyancestors = ancestor.lazyancestors
1417 lazyancestors = ancestor.lazyancestors
1422 arg = self._uncheckedparentrevs
1418 arg = self._uncheckedparentrevs
1423 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1419 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1424
1420
1425 def descendants(self, revs):
1421 def descendants(self, revs):
1426 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1422 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1427
1423
1428 def findcommonmissing(self, common=None, heads=None):
1424 def findcommonmissing(self, common=None, heads=None):
1429 """Return a tuple of the ancestors of common and the ancestors of heads
1425 """Return a tuple of the ancestors of common and the ancestors of heads
1430 that are not ancestors of common. In revset terminology, we return the
1426 that are not ancestors of common. In revset terminology, we return the
1431 tuple:
1427 tuple:
1432
1428
1433 ::common, (::heads) - (::common)
1429 ::common, (::heads) - (::common)
1434
1430
1435 The list is sorted by revision number, meaning it is
1431 The list is sorted by revision number, meaning it is
1436 topologically sorted.
1432 topologically sorted.
1437
1433
1438 'heads' and 'common' are both lists of node IDs. If heads is
1434 'heads' and 'common' are both lists of node IDs. If heads is
1439 not supplied, uses all of the revlog's heads. If common is not
1435 not supplied, uses all of the revlog's heads. If common is not
1440 supplied, uses nullid."""
1436 supplied, uses nullid."""
1441 if common is None:
1437 if common is None:
1442 common = [self.nullid]
1438 common = [self.nullid]
1443 if heads is None:
1439 if heads is None:
1444 heads = self.heads()
1440 heads = self.heads()
1445
1441
1446 common = [self.rev(n) for n in common]
1442 common = [self.rev(n) for n in common]
1447 heads = [self.rev(n) for n in heads]
1443 heads = [self.rev(n) for n in heads]
1448
1444
1449 # we want the ancestors, but inclusive
1445 # we want the ancestors, but inclusive
1450 class lazyset:
1446 class lazyset:
1451 def __init__(self, lazyvalues):
1447 def __init__(self, lazyvalues):
1452 self.addedvalues = set()
1448 self.addedvalues = set()
1453 self.lazyvalues = lazyvalues
1449 self.lazyvalues = lazyvalues
1454
1450
1455 def __contains__(self, value):
1451 def __contains__(self, value):
1456 return value in self.addedvalues or value in self.lazyvalues
1452 return value in self.addedvalues or value in self.lazyvalues
1457
1453
1458 def __iter__(self):
1454 def __iter__(self):
1459 added = self.addedvalues
1455 added = self.addedvalues
1460 for r in added:
1456 for r in added:
1461 yield r
1457 yield r
1462 for r in self.lazyvalues:
1458 for r in self.lazyvalues:
1463 if not r in added:
1459 if not r in added:
1464 yield r
1460 yield r
1465
1461
1466 def add(self, value):
1462 def add(self, value):
1467 self.addedvalues.add(value)
1463 self.addedvalues.add(value)
1468
1464
1469 def update(self, values):
1465 def update(self, values):
1470 self.addedvalues.update(values)
1466 self.addedvalues.update(values)
1471
1467
1472 has = lazyset(self.ancestors(common))
1468 has = lazyset(self.ancestors(common))
1473 has.add(nullrev)
1469 has.add(nullrev)
1474 has.update(common)
1470 has.update(common)
1475
1471
1476 # take all ancestors from heads that aren't in has
1472 # take all ancestors from heads that aren't in has
1477 missing = set()
1473 missing = set()
1478 visit = collections.deque(r for r in heads if r not in has)
1474 visit = collections.deque(r for r in heads if r not in has)
1479 while visit:
1475 while visit:
1480 r = visit.popleft()
1476 r = visit.popleft()
1481 if r in missing:
1477 if r in missing:
1482 continue
1478 continue
1483 else:
1479 else:
1484 missing.add(r)
1480 missing.add(r)
1485 for p in self.parentrevs(r):
1481 for p in self.parentrevs(r):
1486 if p not in has:
1482 if p not in has:
1487 visit.append(p)
1483 visit.append(p)
1488 missing = list(missing)
1484 missing = list(missing)
1489 missing.sort()
1485 missing.sort()
1490 return has, [self.node(miss) for miss in missing]
1486 return has, [self.node(miss) for miss in missing]
1491
1487
1492 def incrementalmissingrevs(self, common=None):
1488 def incrementalmissingrevs(self, common=None):
1493 """Return an object that can be used to incrementally compute the
1489 """Return an object that can be used to incrementally compute the
1494 revision numbers of the ancestors of arbitrary sets that are not
1490 revision numbers of the ancestors of arbitrary sets that are not
1495 ancestors of common. This is an ancestor.incrementalmissingancestors
1491 ancestors of common. This is an ancestor.incrementalmissingancestors
1496 object.
1492 object.
1497
1493
1498 'common' is a list of revision numbers. If common is not supplied, uses
1494 'common' is a list of revision numbers. If common is not supplied, uses
1499 nullrev.
1495 nullrev.
1500 """
1496 """
1501 if common is None:
1497 if common is None:
1502 common = [nullrev]
1498 common = [nullrev]
1503
1499
1504 if rustancestor is not None and self.index.rust_ext_compat:
1500 if rustancestor is not None and self.index.rust_ext_compat:
1505 return rustancestor.MissingAncestors(self.index, common)
1501 return rustancestor.MissingAncestors(self.index, common)
1506 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1502 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1507
1503
1508 def findmissingrevs(self, common=None, heads=None):
1504 def findmissingrevs(self, common=None, heads=None):
1509 """Return the revision numbers of the ancestors of heads that
1505 """Return the revision numbers of the ancestors of heads that
1510 are not ancestors of common.
1506 are not ancestors of common.
1511
1507
1512 More specifically, return a list of revision numbers corresponding to
1508 More specifically, return a list of revision numbers corresponding to
1513 nodes N such that every N satisfies the following constraints:
1509 nodes N such that every N satisfies the following constraints:
1514
1510
1515 1. N is an ancestor of some node in 'heads'
1511 1. N is an ancestor of some node in 'heads'
1516 2. N is not an ancestor of any node in 'common'
1512 2. N is not an ancestor of any node in 'common'
1517
1513
1518 The list is sorted by revision number, meaning it is
1514 The list is sorted by revision number, meaning it is
1519 topologically sorted.
1515 topologically sorted.
1520
1516
1521 'heads' and 'common' are both lists of revision numbers. If heads is
1517 'heads' and 'common' are both lists of revision numbers. If heads is
1522 not supplied, uses all of the revlog's heads. If common is not
1518 not supplied, uses all of the revlog's heads. If common is not
1523 supplied, uses nullid."""
1519 supplied, uses nullid."""
1524 if common is None:
1520 if common is None:
1525 common = [nullrev]
1521 common = [nullrev]
1526 if heads is None:
1522 if heads is None:
1527 heads = self.headrevs()
1523 heads = self.headrevs()
1528
1524
1529 inc = self.incrementalmissingrevs(common=common)
1525 inc = self.incrementalmissingrevs(common=common)
1530 return inc.missingancestors(heads)
1526 return inc.missingancestors(heads)
1531
1527
1532 def findmissing(self, common=None, heads=None):
1528 def findmissing(self, common=None, heads=None):
1533 """Return the ancestors of heads that are not ancestors of common.
1529 """Return the ancestors of heads that are not ancestors of common.
1534
1530
1535 More specifically, return a list of nodes N such that every N
1531 More specifically, return a list of nodes N such that every N
1536 satisfies the following constraints:
1532 satisfies the following constraints:
1537
1533
1538 1. N is an ancestor of some node in 'heads'
1534 1. N is an ancestor of some node in 'heads'
1539 2. N is not an ancestor of any node in 'common'
1535 2. N is not an ancestor of any node in 'common'
1540
1536
1541 The list is sorted by revision number, meaning it is
1537 The list is sorted by revision number, meaning it is
1542 topologically sorted.
1538 topologically sorted.
1543
1539
1544 'heads' and 'common' are both lists of node IDs. If heads is
1540 'heads' and 'common' are both lists of node IDs. If heads is
1545 not supplied, uses all of the revlog's heads. If common is not
1541 not supplied, uses all of the revlog's heads. If common is not
1546 supplied, uses nullid."""
1542 supplied, uses nullid."""
1547 if common is None:
1543 if common is None:
1548 common = [self.nullid]
1544 common = [self.nullid]
1549 if heads is None:
1545 if heads is None:
1550 heads = self.heads()
1546 heads = self.heads()
1551
1547
1552 common = [self.rev(n) for n in common]
1548 common = [self.rev(n) for n in common]
1553 heads = [self.rev(n) for n in heads]
1549 heads = [self.rev(n) for n in heads]
1554
1550
1555 inc = self.incrementalmissingrevs(common=common)
1551 inc = self.incrementalmissingrevs(common=common)
1556 return [self.node(r) for r in inc.missingancestors(heads)]
1552 return [self.node(r) for r in inc.missingancestors(heads)]
1557
1553
1558 def nodesbetween(self, roots=None, heads=None):
1554 def nodesbetween(self, roots=None, heads=None):
1559 """Return a topological path from 'roots' to 'heads'.
1555 """Return a topological path from 'roots' to 'heads'.
1560
1556
1561 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1557 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1562 topologically sorted list of all nodes N that satisfy both of
1558 topologically sorted list of all nodes N that satisfy both of
1563 these constraints:
1559 these constraints:
1564
1560
1565 1. N is a descendant of some node in 'roots'
1561 1. N is a descendant of some node in 'roots'
1566 2. N is an ancestor of some node in 'heads'
1562 2. N is an ancestor of some node in 'heads'
1567
1563
1568 Every node is considered to be both a descendant and an ancestor
1564 Every node is considered to be both a descendant and an ancestor
1569 of itself, so every reachable node in 'roots' and 'heads' will be
1565 of itself, so every reachable node in 'roots' and 'heads' will be
1570 included in 'nodes'.
1566 included in 'nodes'.
1571
1567
1572 'outroots' is the list of reachable nodes in 'roots', i.e., the
1568 'outroots' is the list of reachable nodes in 'roots', i.e., the
1573 subset of 'roots' that is returned in 'nodes'. Likewise,
1569 subset of 'roots' that is returned in 'nodes'. Likewise,
1574 'outheads' is the subset of 'heads' that is also in 'nodes'.
1570 'outheads' is the subset of 'heads' that is also in 'nodes'.
1575
1571
1576 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1572 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1577 unspecified, uses nullid as the only root. If 'heads' is
1573 unspecified, uses nullid as the only root. If 'heads' is
1578 unspecified, uses list of all of the revlog's heads."""
1574 unspecified, uses list of all of the revlog's heads."""
1579 nonodes = ([], [], [])
1575 nonodes = ([], [], [])
1580 if roots is not None:
1576 if roots is not None:
1581 roots = list(roots)
1577 roots = list(roots)
1582 if not roots:
1578 if not roots:
1583 return nonodes
1579 return nonodes
1584 lowestrev = min([self.rev(n) for n in roots])
1580 lowestrev = min([self.rev(n) for n in roots])
1585 else:
1581 else:
1586 roots = [self.nullid] # Everybody's a descendant of nullid
1582 roots = [self.nullid] # Everybody's a descendant of nullid
1587 lowestrev = nullrev
1583 lowestrev = nullrev
1588 if (lowestrev == nullrev) and (heads is None):
1584 if (lowestrev == nullrev) and (heads is None):
1589 # We want _all_ the nodes!
1585 # We want _all_ the nodes!
1590 return (
1586 return (
1591 [self.node(r) for r in self],
1587 [self.node(r) for r in self],
1592 [self.nullid],
1588 [self.nullid],
1593 list(self.heads()),
1589 list(self.heads()),
1594 )
1590 )
1595 if heads is None:
1591 if heads is None:
1596 # All nodes are ancestors, so the latest ancestor is the last
1592 # All nodes are ancestors, so the latest ancestor is the last
1597 # node.
1593 # node.
1598 highestrev = len(self) - 1
1594 highestrev = len(self) - 1
1599 # Set ancestors to None to signal that every node is an ancestor.
1595 # Set ancestors to None to signal that every node is an ancestor.
1600 ancestors = None
1596 ancestors = None
1601 # Set heads to an empty dictionary for later discovery of heads
1597 # Set heads to an empty dictionary for later discovery of heads
1602 heads = {}
1598 heads = {}
1603 else:
1599 else:
1604 heads = list(heads)
1600 heads = list(heads)
1605 if not heads:
1601 if not heads:
1606 return nonodes
1602 return nonodes
1607 ancestors = set()
1603 ancestors = set()
1608 # Turn heads into a dictionary so we can remove 'fake' heads.
1604 # Turn heads into a dictionary so we can remove 'fake' heads.
1609 # Also, later we will be using it to filter out the heads we can't
1605 # Also, later we will be using it to filter out the heads we can't
1610 # find from roots.
1606 # find from roots.
1611 heads = dict.fromkeys(heads, False)
1607 heads = dict.fromkeys(heads, False)
1612 # Start at the top and keep marking parents until we're done.
1608 # Start at the top and keep marking parents until we're done.
1613 nodestotag = set(heads)
1609 nodestotag = set(heads)
1614 # Remember where the top was so we can use it as a limit later.
1610 # Remember where the top was so we can use it as a limit later.
1615 highestrev = max([self.rev(n) for n in nodestotag])
1611 highestrev = max([self.rev(n) for n in nodestotag])
1616 while nodestotag:
1612 while nodestotag:
1617 # grab a node to tag
1613 # grab a node to tag
1618 n = nodestotag.pop()
1614 n = nodestotag.pop()
1619 # Never tag nullid
1615 # Never tag nullid
1620 if n == self.nullid:
1616 if n == self.nullid:
1621 continue
1617 continue
1622 # A node's revision number represents its place in a
1618 # A node's revision number represents its place in a
1623 # topologically sorted list of nodes.
1619 # topologically sorted list of nodes.
1624 r = self.rev(n)
1620 r = self.rev(n)
1625 if r >= lowestrev:
1621 if r >= lowestrev:
1626 if n not in ancestors:
1622 if n not in ancestors:
1627 # If we are possibly a descendant of one of the roots
1623 # If we are possibly a descendant of one of the roots
1628 # and we haven't already been marked as an ancestor
1624 # and we haven't already been marked as an ancestor
1629 ancestors.add(n) # Mark as ancestor
1625 ancestors.add(n) # Mark as ancestor
1630 # Add non-nullid parents to list of nodes to tag.
1626 # Add non-nullid parents to list of nodes to tag.
1631 nodestotag.update(
1627 nodestotag.update(
1632 [p for p in self.parents(n) if p != self.nullid]
1628 [p for p in self.parents(n) if p != self.nullid]
1633 )
1629 )
1634 elif n in heads: # We've seen it before, is it a fake head?
1630 elif n in heads: # We've seen it before, is it a fake head?
1635 # So it is, real heads should not be the ancestors of
1631 # So it is, real heads should not be the ancestors of
1636 # any other heads.
1632 # any other heads.
1637 heads.pop(n)
1633 heads.pop(n)
1638 if not ancestors:
1634 if not ancestors:
1639 return nonodes
1635 return nonodes
1640 # Now that we have our set of ancestors, we want to remove any
1636 # Now that we have our set of ancestors, we want to remove any
1641 # roots that are not ancestors.
1637 # roots that are not ancestors.
1642
1638
1643 # If one of the roots was nullid, everything is included anyway.
1639 # If one of the roots was nullid, everything is included anyway.
1644 if lowestrev > nullrev:
1640 if lowestrev > nullrev:
1645 # But, since we weren't, let's recompute the lowest rev to not
1641 # But, since we weren't, let's recompute the lowest rev to not
1646 # include roots that aren't ancestors.
1642 # include roots that aren't ancestors.
1647
1643
1648 # Filter out roots that aren't ancestors of heads
1644 # Filter out roots that aren't ancestors of heads
1649 roots = [root for root in roots if root in ancestors]
1645 roots = [root for root in roots if root in ancestors]
1650 # Recompute the lowest revision
1646 # Recompute the lowest revision
1651 if roots:
1647 if roots:
1652 lowestrev = min([self.rev(root) for root in roots])
1648 lowestrev = min([self.rev(root) for root in roots])
1653 else:
1649 else:
1654 # No more roots? Return empty list
1650 # No more roots? Return empty list
1655 return nonodes
1651 return nonodes
1656 else:
1652 else:
1657 # We are descending from nullid, and don't need to care about
1653 # We are descending from nullid, and don't need to care about
1658 # any other roots.
1654 # any other roots.
1659 lowestrev = nullrev
1655 lowestrev = nullrev
1660 roots = [self.nullid]
1656 roots = [self.nullid]
1661 # Transform our roots list into a set.
1657 # Transform our roots list into a set.
1662 descendants = set(roots)
1658 descendants = set(roots)
1663 # Also, keep the original roots so we can filter out roots that aren't
1659 # Also, keep the original roots so we can filter out roots that aren't
1664 # 'real' roots (i.e. are descended from other roots).
1660 # 'real' roots (i.e. are descended from other roots).
1665 roots = descendants.copy()
1661 roots = descendants.copy()
1666 # Our topologically sorted list of output nodes.
1662 # Our topologically sorted list of output nodes.
1667 orderedout = []
1663 orderedout = []
1668 # Don't start at nullid since we don't want nullid in our output list,
1664 # Don't start at nullid since we don't want nullid in our output list,
1669 # and if nullid shows up in descendants, empty parents will look like
1665 # and if nullid shows up in descendants, empty parents will look like
1670 # they're descendants.
1666 # they're descendants.
1671 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1667 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1672 n = self.node(r)
1668 n = self.node(r)
1673 isdescendant = False
1669 isdescendant = False
1674 if lowestrev == nullrev: # Everybody is a descendant of nullid
1670 if lowestrev == nullrev: # Everybody is a descendant of nullid
1675 isdescendant = True
1671 isdescendant = True
1676 elif n in descendants:
1672 elif n in descendants:
1677 # n is already a descendant
1673 # n is already a descendant
1678 isdescendant = True
1674 isdescendant = True
1679 # This check only needs to be done here because all the roots
1675 # This check only needs to be done here because all the roots
1680 # will start being marked is descendants before the loop.
1676 # will start being marked is descendants before the loop.
1681 if n in roots:
1677 if n in roots:
1682 # If n was a root, check if it's a 'real' root.
1678 # If n was a root, check if it's a 'real' root.
1683 p = tuple(self.parents(n))
1679 p = tuple(self.parents(n))
1684 # If any of its parents are descendants, it's not a root.
1680 # If any of its parents are descendants, it's not a root.
1685 if (p[0] in descendants) or (p[1] in descendants):
1681 if (p[0] in descendants) or (p[1] in descendants):
1686 roots.remove(n)
1682 roots.remove(n)
1687 else:
1683 else:
1688 p = tuple(self.parents(n))
1684 p = tuple(self.parents(n))
1689 # A node is a descendant if either of its parents are
1685 # A node is a descendant if either of its parents are
1690 # descendants. (We seeded the dependents list with the roots
1686 # descendants. (We seeded the dependents list with the roots
1691 # up there, remember?)
1687 # up there, remember?)
1692 if (p[0] in descendants) or (p[1] in descendants):
1688 if (p[0] in descendants) or (p[1] in descendants):
1693 descendants.add(n)
1689 descendants.add(n)
1694 isdescendant = True
1690 isdescendant = True
1695 if isdescendant and ((ancestors is None) or (n in ancestors)):
1691 if isdescendant and ((ancestors is None) or (n in ancestors)):
1696 # Only include nodes that are both descendants and ancestors.
1692 # Only include nodes that are both descendants and ancestors.
1697 orderedout.append(n)
1693 orderedout.append(n)
1698 if (ancestors is not None) and (n in heads):
1694 if (ancestors is not None) and (n in heads):
1699 # We're trying to figure out which heads are reachable
1695 # We're trying to figure out which heads are reachable
1700 # from roots.
1696 # from roots.
1701 # Mark this head as having been reached
1697 # Mark this head as having been reached
1702 heads[n] = True
1698 heads[n] = True
1703 elif ancestors is None:
1699 elif ancestors is None:
1704 # Otherwise, we're trying to discover the heads.
1700 # Otherwise, we're trying to discover the heads.
1705 # Assume this is a head because if it isn't, the next step
1701 # Assume this is a head because if it isn't, the next step
1706 # will eventually remove it.
1702 # will eventually remove it.
1707 heads[n] = True
1703 heads[n] = True
1708 # But, obviously its parents aren't.
1704 # But, obviously its parents aren't.
1709 for p in self.parents(n):
1705 for p in self.parents(n):
1710 heads.pop(p, None)
1706 heads.pop(p, None)
1711 heads = [head for head, flag in heads.items() if flag]
1707 heads = [head for head, flag in heads.items() if flag]
1712 roots = list(roots)
1708 roots = list(roots)
1713 assert orderedout
1709 assert orderedout
1714 assert roots
1710 assert roots
1715 assert heads
1711 assert heads
1716 return (orderedout, roots, heads)
1712 return (orderedout, roots, heads)
1717
1713
1718 def headrevs(self, revs=None):
1714 def headrevs(self, revs=None):
1719 if revs is None:
1715 if revs is None:
1720 try:
1716 try:
1721 return self.index.headrevs()
1717 return self.index.headrevs()
1722 except AttributeError:
1718 except AttributeError:
1723 return self._headrevs()
1719 return self._headrevs()
1724 if rustdagop is not None and self.index.rust_ext_compat:
1720 if rustdagop is not None and self.index.rust_ext_compat:
1725 return rustdagop.headrevs(self.index, revs)
1721 return rustdagop.headrevs(self.index, revs)
1726 return dagop.headrevs(revs, self._uncheckedparentrevs)
1722 return dagop.headrevs(revs, self._uncheckedparentrevs)
1727
1723
1728 def computephases(self, roots):
1724 def computephases(self, roots):
1729 return self.index.computephasesmapsets(roots)
1725 return self.index.computephasesmapsets(roots)
1730
1726
1731 def _headrevs(self):
1727 def _headrevs(self):
1732 count = len(self)
1728 count = len(self)
1733 if not count:
1729 if not count:
1734 return [nullrev]
1730 return [nullrev]
1735 # we won't iter over filtered rev so nobody is a head at start
1731 # we won't iter over filtered rev so nobody is a head at start
1736 ishead = [0] * (count + 1)
1732 ishead = [0] * (count + 1)
1737 index = self.index
1733 index = self.index
1738 for r in self:
1734 for r in self:
1739 ishead[r] = 1 # I may be an head
1735 ishead[r] = 1 # I may be an head
1740 e = index[r]
1736 e = index[r]
1741 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1737 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1742 return [r for r, val in enumerate(ishead) if val]
1738 return [r for r, val in enumerate(ishead) if val]
1743
1739
1744 def heads(self, start=None, stop=None):
1740 def heads(self, start=None, stop=None):
1745 """return the list of all nodes that have no children
1741 """return the list of all nodes that have no children
1746
1742
1747 if start is specified, only heads that are descendants of
1743 if start is specified, only heads that are descendants of
1748 start will be returned
1744 start will be returned
1749 if stop is specified, it will consider all the revs from stop
1745 if stop is specified, it will consider all the revs from stop
1750 as if they had no children
1746 as if they had no children
1751 """
1747 """
1752 if start is None and stop is None:
1748 if start is None and stop is None:
1753 if not len(self):
1749 if not len(self):
1754 return [self.nullid]
1750 return [self.nullid]
1755 return [self.node(r) for r in self.headrevs()]
1751 return [self.node(r) for r in self.headrevs()]
1756
1752
1757 if start is None:
1753 if start is None:
1758 start = nullrev
1754 start = nullrev
1759 else:
1755 else:
1760 start = self.rev(start)
1756 start = self.rev(start)
1761
1757
1762 stoprevs = {self.rev(n) for n in stop or []}
1758 stoprevs = {self.rev(n) for n in stop or []}
1763
1759
1764 revs = dagop.headrevssubset(
1760 revs = dagop.headrevssubset(
1765 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1761 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1766 )
1762 )
1767
1763
1768 return [self.node(rev) for rev in revs]
1764 return [self.node(rev) for rev in revs]
1769
1765
1770 def children(self, node):
1766 def children(self, node):
1771 """find the children of a given node"""
1767 """find the children of a given node"""
1772 c = []
1768 c = []
1773 p = self.rev(node)
1769 p = self.rev(node)
1774 for r in self.revs(start=p + 1):
1770 for r in self.revs(start=p + 1):
1775 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1771 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1776 if prevs:
1772 if prevs:
1777 for pr in prevs:
1773 for pr in prevs:
1778 if pr == p:
1774 if pr == p:
1779 c.append(self.node(r))
1775 c.append(self.node(r))
1780 elif p == nullrev:
1776 elif p == nullrev:
1781 c.append(self.node(r))
1777 c.append(self.node(r))
1782 return c
1778 return c
1783
1779
1784 def commonancestorsheads(self, a, b):
1780 def commonancestorsheads(self, a, b):
1785 """calculate all the heads of the common ancestors of nodes a and b"""
1781 """calculate all the heads of the common ancestors of nodes a and b"""
1786 a, b = self.rev(a), self.rev(b)
1782 a, b = self.rev(a), self.rev(b)
1787 ancs = self._commonancestorsheads(a, b)
1783 ancs = self._commonancestorsheads(a, b)
1788 return pycompat.maplist(self.node, ancs)
1784 return pycompat.maplist(self.node, ancs)
1789
1785
1790 def _commonancestorsheads(self, *revs):
1786 def _commonancestorsheads(self, *revs):
1791 """calculate all the heads of the common ancestors of revs"""
1787 """calculate all the heads of the common ancestors of revs"""
1792 try:
1788 try:
1793 ancs = self.index.commonancestorsheads(*revs)
1789 ancs = self.index.commonancestorsheads(*revs)
1794 except (AttributeError, OverflowError): # C implementation failed
1790 except (AttributeError, OverflowError): # C implementation failed
1795 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1791 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1796 return ancs
1792 return ancs
1797
1793
1798 def isancestor(self, a, b):
1794 def isancestor(self, a, b):
1799 """return True if node a is an ancestor of node b
1795 """return True if node a is an ancestor of node b
1800
1796
1801 A revision is considered an ancestor of itself."""
1797 A revision is considered an ancestor of itself."""
1802 a, b = self.rev(a), self.rev(b)
1798 a, b = self.rev(a), self.rev(b)
1803 return self.isancestorrev(a, b)
1799 return self.isancestorrev(a, b)
1804
1800
1805 def isancestorrev(self, a, b):
1801 def isancestorrev(self, a, b):
1806 """return True if revision a is an ancestor of revision b
1802 """return True if revision a is an ancestor of revision b
1807
1803
1808 A revision is considered an ancestor of itself.
1804 A revision is considered an ancestor of itself.
1809
1805
1810 The implementation of this is trivial but the use of
1806 The implementation of this is trivial but the use of
1811 reachableroots is not."""
1807 reachableroots is not."""
1812 if a == nullrev:
1808 if a == nullrev:
1813 return True
1809 return True
1814 elif a == b:
1810 elif a == b:
1815 return True
1811 return True
1816 elif a > b:
1812 elif a > b:
1817 return False
1813 return False
1818 return bool(self.reachableroots(a, [b], [a], includepath=False))
1814 return bool(self.reachableroots(a, [b], [a], includepath=False))
1819
1815
1820 def reachableroots(self, minroot, heads, roots, includepath=False):
1816 def reachableroots(self, minroot, heads, roots, includepath=False):
1821 """return (heads(::(<roots> and <roots>::<heads>)))
1817 """return (heads(::(<roots> and <roots>::<heads>)))
1822
1818
1823 If includepath is True, return (<roots>::<heads>)."""
1819 If includepath is True, return (<roots>::<heads>)."""
1824 try:
1820 try:
1825 return self.index.reachableroots2(
1821 return self.index.reachableroots2(
1826 minroot, heads, roots, includepath
1822 minroot, heads, roots, includepath
1827 )
1823 )
1828 except AttributeError:
1824 except AttributeError:
1829 return dagop._reachablerootspure(
1825 return dagop._reachablerootspure(
1830 self.parentrevs, minroot, roots, heads, includepath
1826 self.parentrevs, minroot, roots, heads, includepath
1831 )
1827 )
1832
1828
1833 def ancestor(self, a, b):
1829 def ancestor(self, a, b):
1834 """calculate the "best" common ancestor of nodes a and b"""
1830 """calculate the "best" common ancestor of nodes a and b"""
1835
1831
1836 a, b = self.rev(a), self.rev(b)
1832 a, b = self.rev(a), self.rev(b)
1837 try:
1833 try:
1838 ancs = self.index.ancestors(a, b)
1834 ancs = self.index.ancestors(a, b)
1839 except (AttributeError, OverflowError):
1835 except (AttributeError, OverflowError):
1840 ancs = ancestor.ancestors(self.parentrevs, a, b)
1836 ancs = ancestor.ancestors(self.parentrevs, a, b)
1841 if ancs:
1837 if ancs:
1842 # choose a consistent winner when there's a tie
1838 # choose a consistent winner when there's a tie
1843 return min(map(self.node, ancs))
1839 return min(map(self.node, ancs))
1844 return self.nullid
1840 return self.nullid
1845
1841
1846 def _match(self, id):
1842 def _match(self, id):
1847 if isinstance(id, int):
1843 if isinstance(id, int):
1848 # rev
1844 # rev
1849 return self.node(id)
1845 return self.node(id)
1850 if len(id) == self.nodeconstants.nodelen:
1846 if len(id) == self.nodeconstants.nodelen:
1851 # possibly a binary node
1847 # possibly a binary node
1852 # odds of a binary node being all hex in ASCII are 1 in 10**25
1848 # odds of a binary node being all hex in ASCII are 1 in 10**25
1853 try:
1849 try:
1854 node = id
1850 node = id
1855 self.rev(node) # quick search the index
1851 self.rev(node) # quick search the index
1856 return node
1852 return node
1857 except error.LookupError:
1853 except error.LookupError:
1858 pass # may be partial hex id
1854 pass # may be partial hex id
1859 try:
1855 try:
1860 # str(rev)
1856 # str(rev)
1861 rev = int(id)
1857 rev = int(id)
1862 if b"%d" % rev != id:
1858 if b"%d" % rev != id:
1863 raise ValueError
1859 raise ValueError
1864 if rev < 0:
1860 if rev < 0:
1865 rev = len(self) + rev
1861 rev = len(self) + rev
1866 if rev < 0 or rev >= len(self):
1862 if rev < 0 or rev >= len(self):
1867 raise ValueError
1863 raise ValueError
1868 return self.node(rev)
1864 return self.node(rev)
1869 except (ValueError, OverflowError):
1865 except (ValueError, OverflowError):
1870 pass
1866 pass
1871 if len(id) == 2 * self.nodeconstants.nodelen:
1867 if len(id) == 2 * self.nodeconstants.nodelen:
1872 try:
1868 try:
1873 # a full hex nodeid?
1869 # a full hex nodeid?
1874 node = bin(id)
1870 node = bin(id)
1875 self.rev(node)
1871 self.rev(node)
1876 return node
1872 return node
1877 except (binascii.Error, error.LookupError):
1873 except (binascii.Error, error.LookupError):
1878 pass
1874 pass
1879
1875
1880 def _partialmatch(self, id):
1876 def _partialmatch(self, id):
1881 # we don't care wdirfilenodeids as they should be always full hash
1877 # we don't care wdirfilenodeids as they should be always full hash
1882 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1878 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1883 ambiguous = False
1879 ambiguous = False
1884 try:
1880 try:
1885 partial = self.index.partialmatch(id)
1881 partial = self.index.partialmatch(id)
1886 if partial and self.hasnode(partial):
1882 if partial and self.hasnode(partial):
1887 if maybewdir:
1883 if maybewdir:
1888 # single 'ff...' match in radix tree, ambiguous with wdir
1884 # single 'ff...' match in radix tree, ambiguous with wdir
1889 ambiguous = True
1885 ambiguous = True
1890 else:
1886 else:
1891 return partial
1887 return partial
1892 elif maybewdir:
1888 elif maybewdir:
1893 # no 'ff...' match in radix tree, wdir identified
1889 # no 'ff...' match in radix tree, wdir identified
1894 raise error.WdirUnsupported
1890 raise error.WdirUnsupported
1895 else:
1891 else:
1896 return None
1892 return None
1897 except error.RevlogError:
1893 except error.RevlogError:
1898 # parsers.c radix tree lookup gave multiple matches
1894 # parsers.c radix tree lookup gave multiple matches
1899 # fast path: for unfiltered changelog, radix tree is accurate
1895 # fast path: for unfiltered changelog, radix tree is accurate
1900 if not getattr(self, 'filteredrevs', None):
1896 if not getattr(self, 'filteredrevs', None):
1901 ambiguous = True
1897 ambiguous = True
1902 # fall through to slow path that filters hidden revisions
1898 # fall through to slow path that filters hidden revisions
1903 except (AttributeError, ValueError):
1899 except (AttributeError, ValueError):
1904 # we are pure python, or key is not hex
1900 # we are pure python, or key is not hex
1905 pass
1901 pass
1906 if ambiguous:
1902 if ambiguous:
1907 raise error.AmbiguousPrefixLookupError(
1903 raise error.AmbiguousPrefixLookupError(
1908 id, self.display_id, _(b'ambiguous identifier')
1904 id, self.display_id, _(b'ambiguous identifier')
1909 )
1905 )
1910
1906
1911 if id in self._pcache:
1907 if id in self._pcache:
1912 return self._pcache[id]
1908 return self._pcache[id]
1913
1909
1914 if len(id) <= 40:
1910 if len(id) <= 40:
1915 # hex(node)[:...]
1911 # hex(node)[:...]
1916 l = len(id) // 2 * 2 # grab an even number of digits
1912 l = len(id) // 2 * 2 # grab an even number of digits
1917 try:
1913 try:
1918 # we're dropping the last digit, so let's check that it's hex,
1914 # we're dropping the last digit, so let's check that it's hex,
1919 # to avoid the expensive computation below if it's not
1915 # to avoid the expensive computation below if it's not
1920 if len(id) % 2 > 0:
1916 if len(id) % 2 > 0:
1921 if not (id[-1] in hexdigits):
1917 if not (id[-1] in hexdigits):
1922 return None
1918 return None
1923 prefix = bin(id[:l])
1919 prefix = bin(id[:l])
1924 except binascii.Error:
1920 except binascii.Error:
1925 pass
1921 pass
1926 else:
1922 else:
1927 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1923 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1928 nl = [
1924 nl = [
1929 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1925 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1930 ]
1926 ]
1931 if self.nodeconstants.nullhex.startswith(id):
1927 if self.nodeconstants.nullhex.startswith(id):
1932 nl.append(self.nullid)
1928 nl.append(self.nullid)
1933 if len(nl) > 0:
1929 if len(nl) > 0:
1934 if len(nl) == 1 and not maybewdir:
1930 if len(nl) == 1 and not maybewdir:
1935 self._pcache[id] = nl[0]
1931 self._pcache[id] = nl[0]
1936 return nl[0]
1932 return nl[0]
1937 raise error.AmbiguousPrefixLookupError(
1933 raise error.AmbiguousPrefixLookupError(
1938 id, self.display_id, _(b'ambiguous identifier')
1934 id, self.display_id, _(b'ambiguous identifier')
1939 )
1935 )
1940 if maybewdir:
1936 if maybewdir:
1941 raise error.WdirUnsupported
1937 raise error.WdirUnsupported
1942 return None
1938 return None
1943
1939
1944 def lookup(self, id):
1940 def lookup(self, id):
1945 """locate a node based on:
1941 """locate a node based on:
1946 - revision number or str(revision number)
1942 - revision number or str(revision number)
1947 - nodeid or subset of hex nodeid
1943 - nodeid or subset of hex nodeid
1948 """
1944 """
1949 n = self._match(id)
1945 n = self._match(id)
1950 if n is not None:
1946 if n is not None:
1951 return n
1947 return n
1952 n = self._partialmatch(id)
1948 n = self._partialmatch(id)
1953 if n:
1949 if n:
1954 return n
1950 return n
1955
1951
1956 raise error.LookupError(id, self.display_id, _(b'no match found'))
1952 raise error.LookupError(id, self.display_id, _(b'no match found'))
1957
1953
1958 def shortest(self, node, minlength=1):
1954 def shortest(self, node, minlength=1):
1959 """Find the shortest unambiguous prefix that matches node."""
1955 """Find the shortest unambiguous prefix that matches node."""
1960
1956
1961 def isvalid(prefix):
1957 def isvalid(prefix):
1962 try:
1958 try:
1963 matchednode = self._partialmatch(prefix)
1959 matchednode = self._partialmatch(prefix)
1964 except error.AmbiguousPrefixLookupError:
1960 except error.AmbiguousPrefixLookupError:
1965 return False
1961 return False
1966 except error.WdirUnsupported:
1962 except error.WdirUnsupported:
1967 # single 'ff...' match
1963 # single 'ff...' match
1968 return True
1964 return True
1969 if matchednode is None:
1965 if matchednode is None:
1970 raise error.LookupError(node, self.display_id, _(b'no node'))
1966 raise error.LookupError(node, self.display_id, _(b'no node'))
1971 return True
1967 return True
1972
1968
1973 def maybewdir(prefix):
1969 def maybewdir(prefix):
1974 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1970 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1975
1971
1976 hexnode = hex(node)
1972 hexnode = hex(node)
1977
1973
1978 def disambiguate(hexnode, minlength):
1974 def disambiguate(hexnode, minlength):
1979 """Disambiguate against wdirid."""
1975 """Disambiguate against wdirid."""
1980 for length in range(minlength, len(hexnode) + 1):
1976 for length in range(minlength, len(hexnode) + 1):
1981 prefix = hexnode[:length]
1977 prefix = hexnode[:length]
1982 if not maybewdir(prefix):
1978 if not maybewdir(prefix):
1983 return prefix
1979 return prefix
1984
1980
1985 if not getattr(self, 'filteredrevs', None):
1981 if not getattr(self, 'filteredrevs', None):
1986 try:
1982 try:
1987 length = max(self.index.shortest(node), minlength)
1983 length = max(self.index.shortest(node), minlength)
1988 return disambiguate(hexnode, length)
1984 return disambiguate(hexnode, length)
1989 except error.RevlogError:
1985 except error.RevlogError:
1990 if node != self.nodeconstants.wdirid:
1986 if node != self.nodeconstants.wdirid:
1991 raise error.LookupError(
1987 raise error.LookupError(
1992 node, self.display_id, _(b'no node')
1988 node, self.display_id, _(b'no node')
1993 )
1989 )
1994 except AttributeError:
1990 except AttributeError:
1995 # Fall through to pure code
1991 # Fall through to pure code
1996 pass
1992 pass
1997
1993
1998 if node == self.nodeconstants.wdirid:
1994 if node == self.nodeconstants.wdirid:
1999 for length in range(minlength, len(hexnode) + 1):
1995 for length in range(minlength, len(hexnode) + 1):
2000 prefix = hexnode[:length]
1996 prefix = hexnode[:length]
2001 if isvalid(prefix):
1997 if isvalid(prefix):
2002 return prefix
1998 return prefix
2003
1999
2004 for length in range(minlength, len(hexnode) + 1):
2000 for length in range(minlength, len(hexnode) + 1):
2005 prefix = hexnode[:length]
2001 prefix = hexnode[:length]
2006 if isvalid(prefix):
2002 if isvalid(prefix):
2007 return disambiguate(hexnode, length)
2003 return disambiguate(hexnode, length)
2008
2004
2009 def cmp(self, node, text):
2005 def cmp(self, node, text):
2010 """compare text with a given file revision
2006 """compare text with a given file revision
2011
2007
2012 returns True if text is different than what is stored.
2008 returns True if text is different than what is stored.
2013 """
2009 """
2014 p1, p2 = self.parents(node)
2010 p1, p2 = self.parents(node)
2015 return storageutil.hashrevisionsha1(text, p1, p2) != node
2011 return storageutil.hashrevisionsha1(text, p1, p2) != node
2016
2012
2017 def _getsegmentforrevs(self, startrev, endrev):
2013 def _getsegmentforrevs(self, startrev, endrev):
2018 """Obtain a segment of raw data corresponding to a range of revisions.
2014 """Obtain a segment of raw data corresponding to a range of revisions.
2019
2015
2020 Accepts the start and end revisions and an optional already-open
2016 Accepts the start and end revisions and an optional already-open
2021 file handle to be used for reading. If the file handle is read, its
2017 file handle to be used for reading. If the file handle is read, its
2022 seek position will not be preserved.
2018 seek position will not be preserved.
2023
2019
2024 Requests for data may be satisfied by a cache.
2020 Requests for data may be satisfied by a cache.
2025
2021
2026 Returns a 2-tuple of (offset, data) for the requested range of
2022 Returns a 2-tuple of (offset, data) for the requested range of
2027 revisions. Offset is the integer offset from the beginning of the
2023 revisions. Offset is the integer offset from the beginning of the
2028 revlog and data is a str or buffer of the raw byte data.
2024 revlog and data is a str or buffer of the raw byte data.
2029
2025
2030 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2026 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2031 to determine where each revision's data begins and ends.
2027 to determine where each revision's data begins and ends.
2032 """
2028 """
2033 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2029 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2034 # (functions are expensive).
2030 # (functions are expensive).
2035 index = self.index
2031 index = self.index
2036 istart = index[startrev]
2032 istart = index[startrev]
2037 start = int(istart[0] >> 16)
2033 start = int(istart[0] >> 16)
2038 if startrev == endrev:
2034 if startrev == endrev:
2039 end = start + istart[1]
2035 end = start + istart[1]
2040 else:
2036 else:
2041 iend = index[endrev]
2037 iend = index[endrev]
2042 end = int(iend[0] >> 16) + iend[1]
2038 end = int(iend[0] >> 16) + iend[1]
2043
2039
2044 if self._inline:
2040 if self._inline:
2045 start += (startrev + 1) * self.index.entry_size
2041 start += (startrev + 1) * self.index.entry_size
2046 end += (endrev + 1) * self.index.entry_size
2042 end += (endrev + 1) * self.index.entry_size
2047 length = end - start
2043 length = end - start
2048
2044
2049 return start, self._segmentfile.read_chunk(start, length)
2045 return start, self._segmentfile.read_chunk(start, length)
2050
2046
2051 def _chunk(self, rev):
2047 def _chunk(self, rev):
2052 """Obtain a single decompressed chunk for a revision.
2048 """Obtain a single decompressed chunk for a revision.
2053
2049
2054 Accepts an integer revision and an optional already-open file handle
2050 Accepts an integer revision and an optional already-open file handle
2055 to be used for reading. If used, the seek position of the file will not
2051 to be used for reading. If used, the seek position of the file will not
2056 be preserved.
2052 be preserved.
2057
2053
2058 Returns a str holding uncompressed data for the requested revision.
2054 Returns a str holding uncompressed data for the requested revision.
2059 """
2055 """
2060 compression_mode = self.index[rev][10]
2056 compression_mode = self.index[rev][10]
2061 data = self._getsegmentforrevs(rev, rev)[1]
2057 data = self._getsegmentforrevs(rev, rev)[1]
2062 if compression_mode == COMP_MODE_PLAIN:
2058 if compression_mode == COMP_MODE_PLAIN:
2063 return data
2059 return data
2064 elif compression_mode == COMP_MODE_DEFAULT:
2060 elif compression_mode == COMP_MODE_DEFAULT:
2065 return self._decompressor(data)
2061 return self._decompressor(data)
2066 elif compression_mode == COMP_MODE_INLINE:
2062 elif compression_mode == COMP_MODE_INLINE:
2067 return self.decompress(data)
2063 return self.decompress(data)
2068 else:
2064 else:
2069 msg = b'unknown compression mode %d'
2065 msg = b'unknown compression mode %d'
2070 msg %= compression_mode
2066 msg %= compression_mode
2071 raise error.RevlogError(msg)
2067 raise error.RevlogError(msg)
2072
2068
2073 def _chunks(self, revs, targetsize=None):
2069 def _chunks(self, revs, targetsize=None):
2074 """Obtain decompressed chunks for the specified revisions.
2070 """Obtain decompressed chunks for the specified revisions.
2075
2071
2076 Accepts an iterable of numeric revisions that are assumed to be in
2072 Accepts an iterable of numeric revisions that are assumed to be in
2077 ascending order. Also accepts an optional already-open file handle
2073 ascending order. Also accepts an optional already-open file handle
2078 to be used for reading. If used, the seek position of the file will
2074 to be used for reading. If used, the seek position of the file will
2079 not be preserved.
2075 not be preserved.
2080
2076
2081 This function is similar to calling ``self._chunk()`` multiple times,
2077 This function is similar to calling ``self._chunk()`` multiple times,
2082 but is faster.
2078 but is faster.
2083
2079
2084 Returns a list with decompressed data for each requested revision.
2080 Returns a list with decompressed data for each requested revision.
2085 """
2081 """
2086 if not revs:
2082 if not revs:
2087 return []
2083 return []
2088 start = self.start
2084 start = self.start
2089 length = self.length
2085 length = self.length
2090 inline = self._inline
2086 inline = self._inline
2091 iosize = self.index.entry_size
2087 iosize = self.index.entry_size
2092 buffer = util.buffer
2088 buffer = util.buffer
2093
2089
2094 l = []
2090 l = []
2095 ladd = l.append
2091 ladd = l.append
2096
2092
2097 if not self.data_config.with_sparse_read:
2093 if not self.data_config.with_sparse_read:
2098 slicedchunks = (revs,)
2094 slicedchunks = (revs,)
2099 else:
2095 else:
2100 slicedchunks = deltautil.slicechunk(
2096 slicedchunks = deltautil.slicechunk(
2101 self, revs, targetsize=targetsize
2097 self, revs, targetsize=targetsize
2102 )
2098 )
2103
2099
2104 for revschunk in slicedchunks:
2100 for revschunk in slicedchunks:
2105 firstrev = revschunk[0]
2101 firstrev = revschunk[0]
2106 # Skip trailing revisions with empty diff
2102 # Skip trailing revisions with empty diff
2107 for lastrev in revschunk[::-1]:
2103 for lastrev in revschunk[::-1]:
2108 if length(lastrev) != 0:
2104 if length(lastrev) != 0:
2109 break
2105 break
2110
2106
2111 try:
2107 try:
2112 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2108 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2113 except OverflowError:
2109 except OverflowError:
2114 # issue4215 - we can't cache a run of chunks greater than
2110 # issue4215 - we can't cache a run of chunks greater than
2115 # 2G on Windows
2111 # 2G on Windows
2116 return [self._chunk(rev) for rev in revschunk]
2112 return [self._chunk(rev) for rev in revschunk]
2117
2113
2118 decomp = self.decompress
2114 decomp = self.decompress
2119 # self._decompressor might be None, but will not be used in that case
2115 # self._decompressor might be None, but will not be used in that case
2120 def_decomp = self._decompressor
2116 def_decomp = self._decompressor
2121 for rev in revschunk:
2117 for rev in revschunk:
2122 chunkstart = start(rev)
2118 chunkstart = start(rev)
2123 if inline:
2119 if inline:
2124 chunkstart += (rev + 1) * iosize
2120 chunkstart += (rev + 1) * iosize
2125 chunklength = length(rev)
2121 chunklength = length(rev)
2126 comp_mode = self.index[rev][10]
2122 comp_mode = self.index[rev][10]
2127 c = buffer(data, chunkstart - offset, chunklength)
2123 c = buffer(data, chunkstart - offset, chunklength)
2128 if comp_mode == COMP_MODE_PLAIN:
2124 if comp_mode == COMP_MODE_PLAIN:
2129 ladd(c)
2125 ladd(c)
2130 elif comp_mode == COMP_MODE_INLINE:
2126 elif comp_mode == COMP_MODE_INLINE:
2131 ladd(decomp(c))
2127 ladd(decomp(c))
2132 elif comp_mode == COMP_MODE_DEFAULT:
2128 elif comp_mode == COMP_MODE_DEFAULT:
2133 ladd(def_decomp(c))
2129 ladd(def_decomp(c))
2134 else:
2130 else:
2135 msg = b'unknown compression mode %d'
2131 msg = b'unknown compression mode %d'
2136 msg %= comp_mode
2132 msg %= comp_mode
2137 raise error.RevlogError(msg)
2133 raise error.RevlogError(msg)
2138
2134
2139 return l
2135 return l
2140
2136
2141 def deltaparent(self, rev):
2137 def deltaparent(self, rev):
2142 """return deltaparent of the given revision"""
2138 """return deltaparent of the given revision"""
2143 base = self.index[rev][3]
2139 base = self.index[rev][3]
2144 if base == rev:
2140 if base == rev:
2145 return nullrev
2141 return nullrev
2146 elif self.delta_config.general_delta:
2142 elif self.delta_config.general_delta:
2147 return base
2143 return base
2148 else:
2144 else:
2149 return rev - 1
2145 return rev - 1
2150
2146
2151 def issnapshot(self, rev):
2147 def issnapshot(self, rev):
2152 """tells whether rev is a snapshot"""
2148 """tells whether rev is a snapshot"""
2153 if not self.delta_config.sparse_revlog:
2149 if not self.delta_config.sparse_revlog:
2154 return self.deltaparent(rev) == nullrev
2150 return self.deltaparent(rev) == nullrev
2155 elif hasattr(self.index, 'issnapshot'):
2151 elif hasattr(self.index, 'issnapshot'):
2156 # directly assign the method to cache the testing and access
2152 # directly assign the method to cache the testing and access
2157 self.issnapshot = self.index.issnapshot
2153 self.issnapshot = self.index.issnapshot
2158 return self.issnapshot(rev)
2154 return self.issnapshot(rev)
2159 if rev == nullrev:
2155 if rev == nullrev:
2160 return True
2156 return True
2161 entry = self.index[rev]
2157 entry = self.index[rev]
2162 base = entry[3]
2158 base = entry[3]
2163 if base == rev:
2159 if base == rev:
2164 return True
2160 return True
2165 if base == nullrev:
2161 if base == nullrev:
2166 return True
2162 return True
2167 p1 = entry[5]
2163 p1 = entry[5]
2168 while self.length(p1) == 0:
2164 while self.length(p1) == 0:
2169 b = self.deltaparent(p1)
2165 b = self.deltaparent(p1)
2170 if b == p1:
2166 if b == p1:
2171 break
2167 break
2172 p1 = b
2168 p1 = b
2173 p2 = entry[6]
2169 p2 = entry[6]
2174 while self.length(p2) == 0:
2170 while self.length(p2) == 0:
2175 b = self.deltaparent(p2)
2171 b = self.deltaparent(p2)
2176 if b == p2:
2172 if b == p2:
2177 break
2173 break
2178 p2 = b
2174 p2 = b
2179 if base == p1 or base == p2:
2175 if base == p1 or base == p2:
2180 return False
2176 return False
2181 return self.issnapshot(base)
2177 return self.issnapshot(base)
2182
2178
2183 def snapshotdepth(self, rev):
2179 def snapshotdepth(self, rev):
2184 """number of snapshot in the chain before this one"""
2180 """number of snapshot in the chain before this one"""
2185 if not self.issnapshot(rev):
2181 if not self.issnapshot(rev):
2186 raise error.ProgrammingError(b'revision %d not a snapshot')
2182 raise error.ProgrammingError(b'revision %d not a snapshot')
2187 return len(self._deltachain(rev)[0]) - 1
2183 return len(self._deltachain(rev)[0]) - 1
2188
2184
2189 def revdiff(self, rev1, rev2):
2185 def revdiff(self, rev1, rev2):
2190 """return or calculate a delta between two revisions
2186 """return or calculate a delta between two revisions
2191
2187
2192 The delta calculated is in binary form and is intended to be written to
2188 The delta calculated is in binary form and is intended to be written to
2193 revlog data directly. So this function needs raw revision data.
2189 revlog data directly. So this function needs raw revision data.
2194 """
2190 """
2195 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2191 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2196 return bytes(self._chunk(rev2))
2192 return bytes(self._chunk(rev2))
2197
2193
2198 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2194 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2199
2195
2200 def revision(self, nodeorrev):
2196 def revision(self, nodeorrev):
2201 """return an uncompressed revision of a given node or revision
2197 """return an uncompressed revision of a given node or revision
2202 number.
2198 number.
2203 """
2199 """
2204 return self._revisiondata(nodeorrev)
2200 return self._revisiondata(nodeorrev)
2205
2201
2206 def sidedata(self, nodeorrev):
2202 def sidedata(self, nodeorrev):
2207 """a map of extra data related to the changeset but not part of the hash
2203 """a map of extra data related to the changeset but not part of the hash
2208
2204
2209 This function currently return a dictionary. However, more advanced
2205 This function currently return a dictionary. However, more advanced
2210 mapping object will likely be used in the future for a more
2206 mapping object will likely be used in the future for a more
2211 efficient/lazy code.
2207 efficient/lazy code.
2212 """
2208 """
2213 # deal with <nodeorrev> argument type
2209 # deal with <nodeorrev> argument type
2214 if isinstance(nodeorrev, int):
2210 if isinstance(nodeorrev, int):
2215 rev = nodeorrev
2211 rev = nodeorrev
2216 else:
2212 else:
2217 rev = self.rev(nodeorrev)
2213 rev = self.rev(nodeorrev)
2218 return self._sidedata(rev)
2214 return self._sidedata(rev)
2219
2215
2220 def _revisiondata(self, nodeorrev, raw=False):
2216 def _revisiondata(self, nodeorrev, raw=False):
2221 # deal with <nodeorrev> argument type
2217 # deal with <nodeorrev> argument type
2222 if isinstance(nodeorrev, int):
2218 if isinstance(nodeorrev, int):
2223 rev = nodeorrev
2219 rev = nodeorrev
2224 node = self.node(rev)
2220 node = self.node(rev)
2225 else:
2221 else:
2226 node = nodeorrev
2222 node = nodeorrev
2227 rev = None
2223 rev = None
2228
2224
2229 # fast path the special `nullid` rev
2225 # fast path the special `nullid` rev
2230 if node == self.nullid:
2226 if node == self.nullid:
2231 return b""
2227 return b""
2232
2228
2233 # ``rawtext`` is the text as stored inside the revlog. Might be the
2229 # ``rawtext`` is the text as stored inside the revlog. Might be the
2234 # revision or might need to be processed to retrieve the revision.
2230 # revision or might need to be processed to retrieve the revision.
2235 rev, rawtext, validated = self._rawtext(node, rev)
2231 rev, rawtext, validated = self._rawtext(node, rev)
2236
2232
2237 if raw and validated:
2233 if raw and validated:
2238 # if we don't want to process the raw text and that raw
2234 # if we don't want to process the raw text and that raw
2239 # text is cached, we can exit early.
2235 # text is cached, we can exit early.
2240 return rawtext
2236 return rawtext
2241 if rev is None:
2237 if rev is None:
2242 rev = self.rev(node)
2238 rev = self.rev(node)
2243 # the revlog's flag for this revision
2239 # the revlog's flag for this revision
2244 # (usually alter its state or content)
2240 # (usually alter its state or content)
2245 flags = self.flags(rev)
2241 flags = self.flags(rev)
2246
2242
2247 if validated and flags == REVIDX_DEFAULT_FLAGS:
2243 if validated and flags == REVIDX_DEFAULT_FLAGS:
2248 # no extra flags set, no flag processor runs, text = rawtext
2244 # no extra flags set, no flag processor runs, text = rawtext
2249 return rawtext
2245 return rawtext
2250
2246
2251 if raw:
2247 if raw:
2252 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2248 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2253 text = rawtext
2249 text = rawtext
2254 else:
2250 else:
2255 r = flagutil.processflagsread(self, rawtext, flags)
2251 r = flagutil.processflagsread(self, rawtext, flags)
2256 text, validatehash = r
2252 text, validatehash = r
2257 if validatehash:
2253 if validatehash:
2258 self.checkhash(text, node, rev=rev)
2254 self.checkhash(text, node, rev=rev)
2259 if not validated:
2255 if not validated:
2260 self._revisioncache = (node, rev, rawtext)
2256 self._revisioncache = (node, rev, rawtext)
2261
2257
2262 return text
2258 return text
2263
2259
2264 def _rawtext(self, node, rev):
2260 def _rawtext(self, node, rev):
2265 """return the possibly unvalidated rawtext for a revision
2261 """return the possibly unvalidated rawtext for a revision
2266
2262
2267 returns (rev, rawtext, validated)
2263 returns (rev, rawtext, validated)
2268 """
2264 """
2269
2265
2270 # revision in the cache (could be useful to apply delta)
2266 # revision in the cache (could be useful to apply delta)
2271 cachedrev = None
2267 cachedrev = None
2272 # An intermediate text to apply deltas to
2268 # An intermediate text to apply deltas to
2273 basetext = None
2269 basetext = None
2274
2270
2275 # Check if we have the entry in cache
2271 # Check if we have the entry in cache
2276 # The cache entry looks like (node, rev, rawtext)
2272 # The cache entry looks like (node, rev, rawtext)
2277 if self._revisioncache:
2273 if self._revisioncache:
2278 if self._revisioncache[0] == node:
2274 if self._revisioncache[0] == node:
2279 return (rev, self._revisioncache[2], True)
2275 return (rev, self._revisioncache[2], True)
2280 cachedrev = self._revisioncache[1]
2276 cachedrev = self._revisioncache[1]
2281
2277
2282 if rev is None:
2278 if rev is None:
2283 rev = self.rev(node)
2279 rev = self.rev(node)
2284
2280
2285 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2281 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2286 if stopped:
2282 if stopped:
2287 basetext = self._revisioncache[2]
2283 basetext = self._revisioncache[2]
2288
2284
2289 # drop cache to save memory, the caller is expected to
2285 # drop cache to save memory, the caller is expected to
2290 # update self._revisioncache after validating the text
2286 # update self._revisioncache after validating the text
2291 self._revisioncache = None
2287 self._revisioncache = None
2292
2288
2293 targetsize = None
2289 targetsize = None
2294 rawsize = self.index[rev][2]
2290 rawsize = self.index[rev][2]
2295 if 0 <= rawsize:
2291 if 0 <= rawsize:
2296 targetsize = 4 * rawsize
2292 targetsize = 4 * rawsize
2297
2293
2298 bins = self._chunks(chain, targetsize=targetsize)
2294 bins = self._chunks(chain, targetsize=targetsize)
2299 if basetext is None:
2295 if basetext is None:
2300 basetext = bytes(bins[0])
2296 basetext = bytes(bins[0])
2301 bins = bins[1:]
2297 bins = bins[1:]
2302
2298
2303 rawtext = mdiff.patches(basetext, bins)
2299 rawtext = mdiff.patches(basetext, bins)
2304 del basetext # let us have a chance to free memory early
2300 del basetext # let us have a chance to free memory early
2305 return (rev, rawtext, False)
2301 return (rev, rawtext, False)
2306
2302
2307 def _sidedata(self, rev):
2303 def _sidedata(self, rev):
2308 """Return the sidedata for a given revision number."""
2304 """Return the sidedata for a given revision number."""
2309 index_entry = self.index[rev]
2305 index_entry = self.index[rev]
2310 sidedata_offset = index_entry[8]
2306 sidedata_offset = index_entry[8]
2311 sidedata_size = index_entry[9]
2307 sidedata_size = index_entry[9]
2312
2308
2313 if self._inline:
2309 if self._inline:
2314 sidedata_offset += self.index.entry_size * (1 + rev)
2310 sidedata_offset += self.index.entry_size * (1 + rev)
2315 if sidedata_size == 0:
2311 if sidedata_size == 0:
2316 return {}
2312 return {}
2317
2313
2318 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2314 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2319 filename = self._sidedatafile
2315 filename = self._sidedatafile
2320 end = self._docket.sidedata_end
2316 end = self._docket.sidedata_end
2321 offset = sidedata_offset
2317 offset = sidedata_offset
2322 length = sidedata_size
2318 length = sidedata_size
2323 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2319 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2324 raise error.RevlogError(m)
2320 raise error.RevlogError(m)
2325
2321
2326 comp_segment = self._segmentfile_sidedata.read_chunk(
2322 comp_segment = self._segmentfile_sidedata.read_chunk(
2327 sidedata_offset, sidedata_size
2323 sidedata_offset, sidedata_size
2328 )
2324 )
2329
2325
2330 comp = self.index[rev][11]
2326 comp = self.index[rev][11]
2331 if comp == COMP_MODE_PLAIN:
2327 if comp == COMP_MODE_PLAIN:
2332 segment = comp_segment
2328 segment = comp_segment
2333 elif comp == COMP_MODE_DEFAULT:
2329 elif comp == COMP_MODE_DEFAULT:
2334 segment = self._decompressor(comp_segment)
2330 segment = self._decompressor(comp_segment)
2335 elif comp == COMP_MODE_INLINE:
2331 elif comp == COMP_MODE_INLINE:
2336 segment = self.decompress(comp_segment)
2332 segment = self.decompress(comp_segment)
2337 else:
2333 else:
2338 msg = b'unknown compression mode %d'
2334 msg = b'unknown compression mode %d'
2339 msg %= comp
2335 msg %= comp
2340 raise error.RevlogError(msg)
2336 raise error.RevlogError(msg)
2341
2337
2342 sidedata = sidedatautil.deserialize_sidedata(segment)
2338 sidedata = sidedatautil.deserialize_sidedata(segment)
2343 return sidedata
2339 return sidedata
2344
2340
2345 def rawdata(self, nodeorrev):
2341 def rawdata(self, nodeorrev):
2346 """return an uncompressed raw data of a given node or revision number."""
2342 """return an uncompressed raw data of a given node or revision number."""
2347 return self._revisiondata(nodeorrev, raw=True)
2343 return self._revisiondata(nodeorrev, raw=True)
2348
2344
2349 def hash(self, text, p1, p2):
2345 def hash(self, text, p1, p2):
2350 """Compute a node hash.
2346 """Compute a node hash.
2351
2347
2352 Available as a function so that subclasses can replace the hash
2348 Available as a function so that subclasses can replace the hash
2353 as needed.
2349 as needed.
2354 """
2350 """
2355 return storageutil.hashrevisionsha1(text, p1, p2)
2351 return storageutil.hashrevisionsha1(text, p1, p2)
2356
2352
2357 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2353 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2358 """Check node hash integrity.
2354 """Check node hash integrity.
2359
2355
2360 Available as a function so that subclasses can extend hash mismatch
2356 Available as a function so that subclasses can extend hash mismatch
2361 behaviors as needed.
2357 behaviors as needed.
2362 """
2358 """
2363 try:
2359 try:
2364 if p1 is None and p2 is None:
2360 if p1 is None and p2 is None:
2365 p1, p2 = self.parents(node)
2361 p1, p2 = self.parents(node)
2366 if node != self.hash(text, p1, p2):
2362 if node != self.hash(text, p1, p2):
2367 # Clear the revision cache on hash failure. The revision cache
2363 # Clear the revision cache on hash failure. The revision cache
2368 # only stores the raw revision and clearing the cache does have
2364 # only stores the raw revision and clearing the cache does have
2369 # the side-effect that we won't have a cache hit when the raw
2365 # the side-effect that we won't have a cache hit when the raw
2370 # revision data is accessed. But this case should be rare and
2366 # revision data is accessed. But this case should be rare and
2371 # it is extra work to teach the cache about the hash
2367 # it is extra work to teach the cache about the hash
2372 # verification state.
2368 # verification state.
2373 if self._revisioncache and self._revisioncache[0] == node:
2369 if self._revisioncache and self._revisioncache[0] == node:
2374 self._revisioncache = None
2370 self._revisioncache = None
2375
2371
2376 revornode = rev
2372 revornode = rev
2377 if revornode is None:
2373 if revornode is None:
2378 revornode = templatefilters.short(hex(node))
2374 revornode = templatefilters.short(hex(node))
2379 raise error.RevlogError(
2375 raise error.RevlogError(
2380 _(b"integrity check failed on %s:%s")
2376 _(b"integrity check failed on %s:%s")
2381 % (self.display_id, pycompat.bytestr(revornode))
2377 % (self.display_id, pycompat.bytestr(revornode))
2382 )
2378 )
2383 except error.RevlogError:
2379 except error.RevlogError:
2384 if self.feature_config.censorable and storageutil.iscensoredtext(
2380 if self.feature_config.censorable and storageutil.iscensoredtext(
2385 text
2381 text
2386 ):
2382 ):
2387 raise error.CensoredNodeError(self.display_id, node, text)
2383 raise error.CensoredNodeError(self.display_id, node, text)
2388 raise
2384 raise
2389
2385
2390 @property
2386 @property
2391 def _split_index_file(self):
2387 def _split_index_file(self):
2392 """the path where to expect the index of an ongoing splitting operation
2388 """the path where to expect the index of an ongoing splitting operation
2393
2389
2394 The file will only exist if a splitting operation is in progress, but
2390 The file will only exist if a splitting operation is in progress, but
2395 it is always expected at the same location."""
2391 it is always expected at the same location."""
2396 parts = self.radix.split(b'/')
2392 parts = self.radix.split(b'/')
2397 if len(parts) > 1:
2393 if len(parts) > 1:
2398 # adds a '-s' prefix to the ``data/` or `meta/` base
2394 # adds a '-s' prefix to the ``data/` or `meta/` base
2399 head = parts[0] + b'-s'
2395 head = parts[0] + b'-s'
2400 mids = parts[1:-1]
2396 mids = parts[1:-1]
2401 tail = parts[-1] + b'.i'
2397 tail = parts[-1] + b'.i'
2402 pieces = [head] + mids + [tail]
2398 pieces = [head] + mids + [tail]
2403 return b'/'.join(pieces)
2399 return b'/'.join(pieces)
2404 else:
2400 else:
2405 # the revlog is stored at the root of the store (changelog or
2401 # the revlog is stored at the root of the store (changelog or
2406 # manifest), no risk of collision.
2402 # manifest), no risk of collision.
2407 return self.radix + b'.i.s'
2403 return self.radix + b'.i.s'
2408
2404
2409 def _enforceinlinesize(self, tr, side_write=True):
2405 def _enforceinlinesize(self, tr, side_write=True):
2410 """Check if the revlog is too big for inline and convert if so.
2406 """Check if the revlog is too big for inline and convert if so.
2411
2407
2412 This should be called after revisions are added to the revlog. If the
2408 This should be called after revisions are added to the revlog. If the
2413 revlog has grown too large to be an inline revlog, it will convert it
2409 revlog has grown too large to be an inline revlog, it will convert it
2414 to use multiple index and data files.
2410 to use multiple index and data files.
2415 """
2411 """
2416 tiprev = len(self) - 1
2412 tiprev = len(self) - 1
2417 total_size = self.start(tiprev) + self.length(tiprev)
2413 total_size = self.start(tiprev) + self.length(tiprev)
2418 if not self._inline or total_size < _maxinline:
2414 if not self._inline or total_size < _maxinline:
2419 return
2415 return
2420
2416
2421 troffset = tr.findoffset(self._indexfile)
2417 troffset = tr.findoffset(self._indexfile)
2422 if troffset is None:
2418 if troffset is None:
2423 raise error.RevlogError(
2419 raise error.RevlogError(
2424 _(b"%s not found in the transaction") % self._indexfile
2420 _(b"%s not found in the transaction") % self._indexfile
2425 )
2421 )
2426 if troffset:
2422 if troffset:
2427 tr.addbackup(self._indexfile, for_offset=True)
2423 tr.addbackup(self._indexfile, for_offset=True)
2428 tr.add(self._datafile, 0)
2424 tr.add(self._datafile, 0)
2429
2425
2430 existing_handles = False
2426 existing_handles = False
2431 if self._writinghandles is not None:
2427 if self._writinghandles is not None:
2432 existing_handles = True
2428 existing_handles = True
2433 fp = self._writinghandles[0]
2429 fp = self._writinghandles[0]
2434 fp.flush()
2430 fp.flush()
2435 fp.close()
2431 fp.close()
2436 # We can't use the cached file handle after close(). So prevent
2432 # We can't use the cached file handle after close(). So prevent
2437 # its usage.
2433 # its usage.
2438 self._writinghandles = None
2434 self._writinghandles = None
2439 self._segmentfile.writing_handle = None
2435 self._segmentfile.writing_handle = None
2440 # No need to deal with sidedata writing handle as it is only
2436 # No need to deal with sidedata writing handle as it is only
2441 # relevant with revlog-v2 which is never inline, not reaching
2437 # relevant with revlog-v2 which is never inline, not reaching
2442 # this code
2438 # this code
2443 if side_write:
2439 if side_write:
2444 old_index_file_path = self._indexfile
2440 old_index_file_path = self._indexfile
2445 new_index_file_path = self._split_index_file
2441 new_index_file_path = self._split_index_file
2446 opener = self.opener
2442 opener = self.opener
2447 weak_self = weakref.ref(self)
2443 weak_self = weakref.ref(self)
2448
2444
2449 # the "split" index replace the real index when the transaction is finalized
2445 # the "split" index replace the real index when the transaction is finalized
2450 def finalize_callback(tr):
2446 def finalize_callback(tr):
2451 opener.rename(
2447 opener.rename(
2452 new_index_file_path,
2448 new_index_file_path,
2453 old_index_file_path,
2449 old_index_file_path,
2454 checkambig=True,
2450 checkambig=True,
2455 )
2451 )
2456 maybe_self = weak_self()
2452 maybe_self = weak_self()
2457 if maybe_self is not None:
2453 if maybe_self is not None:
2458 maybe_self._indexfile = old_index_file_path
2454 maybe_self._indexfile = old_index_file_path
2459
2455
2460 def abort_callback(tr):
2456 def abort_callback(tr):
2461 maybe_self = weak_self()
2457 maybe_self = weak_self()
2462 if maybe_self is not None:
2458 if maybe_self is not None:
2463 maybe_self._indexfile = old_index_file_path
2459 maybe_self._indexfile = old_index_file_path
2464
2460
2465 tr.registertmp(new_index_file_path)
2461 tr.registertmp(new_index_file_path)
2466 if self.target[1] is not None:
2462 if self.target[1] is not None:
2467 callback_id = b'000-revlog-split-%d-%s' % self.target
2463 callback_id = b'000-revlog-split-%d-%s' % self.target
2468 else:
2464 else:
2469 callback_id = b'000-revlog-split-%d' % self.target[0]
2465 callback_id = b'000-revlog-split-%d' % self.target[0]
2470 tr.addfinalize(callback_id, finalize_callback)
2466 tr.addfinalize(callback_id, finalize_callback)
2471 tr.addabort(callback_id, abort_callback)
2467 tr.addabort(callback_id, abort_callback)
2472
2468
2473 new_dfh = self._datafp(b'w+')
2469 new_dfh = self._datafp(b'w+')
2474 new_dfh.truncate(0) # drop any potentially existing data
2470 new_dfh.truncate(0) # drop any potentially existing data
2475 try:
2471 try:
2476 with self.reading():
2472 with self.reading():
2477 for r in self:
2473 for r in self:
2478 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2474 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2479 new_dfh.flush()
2475 new_dfh.flush()
2480
2476
2481 if side_write:
2477 if side_write:
2482 self._indexfile = new_index_file_path
2478 self._indexfile = new_index_file_path
2483 with self.__index_new_fp() as fp:
2479 with self.__index_new_fp() as fp:
2484 self._format_flags &= ~FLAG_INLINE_DATA
2480 self._format_flags &= ~FLAG_INLINE_DATA
2485 self._inline = False
2481 self._inline = False
2486 for i in self:
2482 for i in self:
2487 e = self.index.entry_binary(i)
2483 e = self.index.entry_binary(i)
2488 if i == 0 and self._docket is None:
2484 if i == 0 and self._docket is None:
2489 header = self._format_flags | self._format_version
2485 header = self._format_flags | self._format_version
2490 header = self.index.pack_header(header)
2486 header = self.index.pack_header(header)
2491 e = header + e
2487 e = header + e
2492 fp.write(e)
2488 fp.write(e)
2493 if self._docket is not None:
2489 if self._docket is not None:
2494 self._docket.index_end = fp.tell()
2490 self._docket.index_end = fp.tell()
2495
2491
2496 # If we don't use side-write, the temp file replace the real
2492 # If we don't use side-write, the temp file replace the real
2497 # index when we exit the context manager
2493 # index when we exit the context manager
2498
2494
2499 nodemaputil.setup_persistent_nodemap(tr, self)
2495 nodemaputil.setup_persistent_nodemap(tr, self)
2500 self._segmentfile = randomaccessfile.randomaccessfile(
2496 self._segmentfile = randomaccessfile.randomaccessfile(
2501 self.opener,
2497 self.opener,
2502 self._datafile,
2498 self._datafile,
2503 self.data_config.chunk_cache_size,
2499 self.data_config.chunk_cache_size,
2504 )
2500 )
2505
2501
2506 if existing_handles:
2502 if existing_handles:
2507 # switched from inline to conventional reopen the index
2503 # switched from inline to conventional reopen the index
2508 ifh = self.__index_write_fp()
2504 ifh = self.__index_write_fp()
2509 self._writinghandles = (ifh, new_dfh, None)
2505 self._writinghandles = (ifh, new_dfh, None)
2510 self._segmentfile.writing_handle = new_dfh
2506 self._segmentfile.writing_handle = new_dfh
2511 new_dfh = None
2507 new_dfh = None
2512 # No need to deal with sidedata writing handle as it is only
2508 # No need to deal with sidedata writing handle as it is only
2513 # relevant with revlog-v2 which is never inline, not reaching
2509 # relevant with revlog-v2 which is never inline, not reaching
2514 # this code
2510 # this code
2515 finally:
2511 finally:
2516 if new_dfh is not None:
2512 if new_dfh is not None:
2517 new_dfh.close()
2513 new_dfh.close()
2518
2514
2519 def _nodeduplicatecallback(self, transaction, node):
2515 def _nodeduplicatecallback(self, transaction, node):
2520 """called when trying to add a node already stored."""
2516 """called when trying to add a node already stored."""
2521
2517
2522 @contextlib.contextmanager
2518 @contextlib.contextmanager
2523 def reading(self):
2519 def reading(self):
2524 """Context manager that keeps data and sidedata files open for reading"""
2520 """Context manager that keeps data and sidedata files open for reading"""
2525 if len(self.index) == 0:
2521 if len(self.index) == 0:
2526 yield # nothing to be read
2522 yield # nothing to be read
2527 else:
2523 else:
2528 with self._segmentfile.reading():
2524 with self._segmentfile.reading():
2529 with self._segmentfile_sidedata.reading():
2525 with self._segmentfile_sidedata.reading():
2530 yield
2526 yield
2531
2527
2532 @contextlib.contextmanager
2528 @contextlib.contextmanager
2533 def _writing(self, transaction):
2529 def _writing(self, transaction):
2534 if self._trypending:
2530 if self._trypending:
2535 msg = b'try to write in a `trypending` revlog: %s'
2531 msg = b'try to write in a `trypending` revlog: %s'
2536 msg %= self.display_id
2532 msg %= self.display_id
2537 raise error.ProgrammingError(msg)
2533 raise error.ProgrammingError(msg)
2538 if self._writinghandles is not None:
2534 if self._writinghandles is not None:
2539 yield
2535 yield
2540 else:
2536 else:
2541 ifh = dfh = sdfh = None
2537 ifh = dfh = sdfh = None
2542 try:
2538 try:
2543 r = len(self)
2539 r = len(self)
2544 # opening the data file.
2540 # opening the data file.
2545 dsize = 0
2541 dsize = 0
2546 if r:
2542 if r:
2547 dsize = self.end(r - 1)
2543 dsize = self.end(r - 1)
2548 dfh = None
2544 dfh = None
2549 if not self._inline:
2545 if not self._inline:
2550 try:
2546 try:
2551 dfh = self._datafp(b"r+")
2547 dfh = self._datafp(b"r+")
2552 if self._docket is None:
2548 if self._docket is None:
2553 dfh.seek(0, os.SEEK_END)
2549 dfh.seek(0, os.SEEK_END)
2554 else:
2550 else:
2555 dfh.seek(self._docket.data_end, os.SEEK_SET)
2551 dfh.seek(self._docket.data_end, os.SEEK_SET)
2556 except FileNotFoundError:
2552 except FileNotFoundError:
2557 dfh = self._datafp(b"w+")
2553 dfh = self._datafp(b"w+")
2558 transaction.add(self._datafile, dsize)
2554 transaction.add(self._datafile, dsize)
2559 if self._sidedatafile is not None:
2555 if self._sidedatafile is not None:
2560 # revlog-v2 does not inline, help Pytype
2556 # revlog-v2 does not inline, help Pytype
2561 assert dfh is not None
2557 assert dfh is not None
2562 try:
2558 try:
2563 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2559 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2564 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2560 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2565 except FileNotFoundError:
2561 except FileNotFoundError:
2566 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2562 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2567 transaction.add(
2563 transaction.add(
2568 self._sidedatafile, self._docket.sidedata_end
2564 self._sidedatafile, self._docket.sidedata_end
2569 )
2565 )
2570
2566
2571 # opening the index file.
2567 # opening the index file.
2572 isize = r * self.index.entry_size
2568 isize = r * self.index.entry_size
2573 ifh = self.__index_write_fp()
2569 ifh = self.__index_write_fp()
2574 if self._inline:
2570 if self._inline:
2575 transaction.add(self._indexfile, dsize + isize)
2571 transaction.add(self._indexfile, dsize + isize)
2576 else:
2572 else:
2577 transaction.add(self._indexfile, isize)
2573 transaction.add(self._indexfile, isize)
2578 # exposing all file handle for writing.
2574 # exposing all file handle for writing.
2579 self._writinghandles = (ifh, dfh, sdfh)
2575 self._writinghandles = (ifh, dfh, sdfh)
2580 self._segmentfile.writing_handle = ifh if self._inline else dfh
2576 self._segmentfile.writing_handle = ifh if self._inline else dfh
2581 self._segmentfile_sidedata.writing_handle = sdfh
2577 self._segmentfile_sidedata.writing_handle = sdfh
2582 yield
2578 yield
2583 if self._docket is not None:
2579 if self._docket is not None:
2584 self._write_docket(transaction)
2580 self._write_docket(transaction)
2585 finally:
2581 finally:
2586 self._writinghandles = None
2582 self._writinghandles = None
2587 self._segmentfile.writing_handle = None
2583 self._segmentfile.writing_handle = None
2588 self._segmentfile_sidedata.writing_handle = None
2584 self._segmentfile_sidedata.writing_handle = None
2589 if dfh is not None:
2585 if dfh is not None:
2590 dfh.close()
2586 dfh.close()
2591 if sdfh is not None:
2587 if sdfh is not None:
2592 sdfh.close()
2588 sdfh.close()
2593 # closing the index file last to avoid exposing referent to
2589 # closing the index file last to avoid exposing referent to
2594 # potential unflushed data content.
2590 # potential unflushed data content.
2595 if ifh is not None:
2591 if ifh is not None:
2596 ifh.close()
2592 ifh.close()
2597
2593
2598 def _write_docket(self, transaction):
2594 def _write_docket(self, transaction):
2599 """write the current docket on disk
2595 """write the current docket on disk
2600
2596
2601 Exist as a method to help changelog to implement transaction logic
2597 Exist as a method to help changelog to implement transaction logic
2602
2598
2603 We could also imagine using the same transaction logic for all revlog
2599 We could also imagine using the same transaction logic for all revlog
2604 since docket are cheap."""
2600 since docket are cheap."""
2605 self._docket.write(transaction)
2601 self._docket.write(transaction)
2606
2602
2607 def addrevision(
2603 def addrevision(
2608 self,
2604 self,
2609 text,
2605 text,
2610 transaction,
2606 transaction,
2611 link,
2607 link,
2612 p1,
2608 p1,
2613 p2,
2609 p2,
2614 cachedelta=None,
2610 cachedelta=None,
2615 node=None,
2611 node=None,
2616 flags=REVIDX_DEFAULT_FLAGS,
2612 flags=REVIDX_DEFAULT_FLAGS,
2617 deltacomputer=None,
2613 deltacomputer=None,
2618 sidedata=None,
2614 sidedata=None,
2619 ):
2615 ):
2620 """add a revision to the log
2616 """add a revision to the log
2621
2617
2622 text - the revision data to add
2618 text - the revision data to add
2623 transaction - the transaction object used for rollback
2619 transaction - the transaction object used for rollback
2624 link - the linkrev data to add
2620 link - the linkrev data to add
2625 p1, p2 - the parent nodeids of the revision
2621 p1, p2 - the parent nodeids of the revision
2626 cachedelta - an optional precomputed delta
2622 cachedelta - an optional precomputed delta
2627 node - nodeid of revision; typically node is not specified, and it is
2623 node - nodeid of revision; typically node is not specified, and it is
2628 computed by default as hash(text, p1, p2), however subclasses might
2624 computed by default as hash(text, p1, p2), however subclasses might
2629 use different hashing method (and override checkhash() in such case)
2625 use different hashing method (and override checkhash() in such case)
2630 flags - the known flags to set on the revision
2626 flags - the known flags to set on the revision
2631 deltacomputer - an optional deltacomputer instance shared between
2627 deltacomputer - an optional deltacomputer instance shared between
2632 multiple calls
2628 multiple calls
2633 """
2629 """
2634 if link == nullrev:
2630 if link == nullrev:
2635 raise error.RevlogError(
2631 raise error.RevlogError(
2636 _(b"attempted to add linkrev -1 to %s") % self.display_id
2632 _(b"attempted to add linkrev -1 to %s") % self.display_id
2637 )
2633 )
2638
2634
2639 if sidedata is None:
2635 if sidedata is None:
2640 sidedata = {}
2636 sidedata = {}
2641 elif sidedata and not self.feature_config.has_side_data:
2637 elif sidedata and not self.feature_config.has_side_data:
2642 raise error.ProgrammingError(
2638 raise error.ProgrammingError(
2643 _(b"trying to add sidedata to a revlog who don't support them")
2639 _(b"trying to add sidedata to a revlog who don't support them")
2644 )
2640 )
2645
2641
2646 if flags:
2642 if flags:
2647 node = node or self.hash(text, p1, p2)
2643 node = node or self.hash(text, p1, p2)
2648
2644
2649 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2645 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2650
2646
2651 # If the flag processor modifies the revision data, ignore any provided
2647 # If the flag processor modifies the revision data, ignore any provided
2652 # cachedelta.
2648 # cachedelta.
2653 if rawtext != text:
2649 if rawtext != text:
2654 cachedelta = None
2650 cachedelta = None
2655
2651
2656 if len(rawtext) > _maxentrysize:
2652 if len(rawtext) > _maxentrysize:
2657 raise error.RevlogError(
2653 raise error.RevlogError(
2658 _(
2654 _(
2659 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2655 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2660 )
2656 )
2661 % (self.display_id, len(rawtext))
2657 % (self.display_id, len(rawtext))
2662 )
2658 )
2663
2659
2664 node = node or self.hash(rawtext, p1, p2)
2660 node = node or self.hash(rawtext, p1, p2)
2665 rev = self.index.get_rev(node)
2661 rev = self.index.get_rev(node)
2666 if rev is not None:
2662 if rev is not None:
2667 return rev
2663 return rev
2668
2664
2669 if validatehash:
2665 if validatehash:
2670 self.checkhash(rawtext, node, p1=p1, p2=p2)
2666 self.checkhash(rawtext, node, p1=p1, p2=p2)
2671
2667
2672 return self.addrawrevision(
2668 return self.addrawrevision(
2673 rawtext,
2669 rawtext,
2674 transaction,
2670 transaction,
2675 link,
2671 link,
2676 p1,
2672 p1,
2677 p2,
2673 p2,
2678 node,
2674 node,
2679 flags,
2675 flags,
2680 cachedelta=cachedelta,
2676 cachedelta=cachedelta,
2681 deltacomputer=deltacomputer,
2677 deltacomputer=deltacomputer,
2682 sidedata=sidedata,
2678 sidedata=sidedata,
2683 )
2679 )
2684
2680
2685 def addrawrevision(
2681 def addrawrevision(
2686 self,
2682 self,
2687 rawtext,
2683 rawtext,
2688 transaction,
2684 transaction,
2689 link,
2685 link,
2690 p1,
2686 p1,
2691 p2,
2687 p2,
2692 node,
2688 node,
2693 flags,
2689 flags,
2694 cachedelta=None,
2690 cachedelta=None,
2695 deltacomputer=None,
2691 deltacomputer=None,
2696 sidedata=None,
2692 sidedata=None,
2697 ):
2693 ):
2698 """add a raw revision with known flags, node and parents
2694 """add a raw revision with known flags, node and parents
2699 useful when reusing a revision not stored in this revlog (ex: received
2695 useful when reusing a revision not stored in this revlog (ex: received
2700 over wire, or read from an external bundle).
2696 over wire, or read from an external bundle).
2701 """
2697 """
2702 with self._writing(transaction):
2698 with self._writing(transaction):
2703 return self._addrevision(
2699 return self._addrevision(
2704 node,
2700 node,
2705 rawtext,
2701 rawtext,
2706 transaction,
2702 transaction,
2707 link,
2703 link,
2708 p1,
2704 p1,
2709 p2,
2705 p2,
2710 flags,
2706 flags,
2711 cachedelta,
2707 cachedelta,
2712 deltacomputer=deltacomputer,
2708 deltacomputer=deltacomputer,
2713 sidedata=sidedata,
2709 sidedata=sidedata,
2714 )
2710 )
2715
2711
2716 def compress(self, data):
2712 def compress(self, data):
2717 """Generate a possibly-compressed representation of data."""
2713 """Generate a possibly-compressed representation of data."""
2718 if not data:
2714 if not data:
2719 return b'', data
2715 return b'', data
2720
2716
2721 compressed = self._compressor.compress(data)
2717 compressed = self._compressor.compress(data)
2722
2718
2723 if compressed:
2719 if compressed:
2724 # The revlog compressor added the header in the returned data.
2720 # The revlog compressor added the header in the returned data.
2725 return b'', compressed
2721 return b'', compressed
2726
2722
2727 if data[0:1] == b'\0':
2723 if data[0:1] == b'\0':
2728 return b'', data
2724 return b'', data
2729 return b'u', data
2725 return b'u', data
2730
2726
2731 def decompress(self, data):
2727 def decompress(self, data):
2732 """Decompress a revlog chunk.
2728 """Decompress a revlog chunk.
2733
2729
2734 The chunk is expected to begin with a header identifying the
2730 The chunk is expected to begin with a header identifying the
2735 format type so it can be routed to an appropriate decompressor.
2731 format type so it can be routed to an appropriate decompressor.
2736 """
2732 """
2737 if not data:
2733 if not data:
2738 return data
2734 return data
2739
2735
2740 # Revlogs are read much more frequently than they are written and many
2736 # Revlogs are read much more frequently than they are written and many
2741 # chunks only take microseconds to decompress, so performance is
2737 # chunks only take microseconds to decompress, so performance is
2742 # important here.
2738 # important here.
2743 #
2739 #
2744 # We can make a few assumptions about revlogs:
2740 # We can make a few assumptions about revlogs:
2745 #
2741 #
2746 # 1) the majority of chunks will be compressed (as opposed to inline
2742 # 1) the majority of chunks will be compressed (as opposed to inline
2747 # raw data).
2743 # raw data).
2748 # 2) decompressing *any* data will likely by at least 10x slower than
2744 # 2) decompressing *any* data will likely by at least 10x slower than
2749 # returning raw inline data.
2745 # returning raw inline data.
2750 # 3) we want to prioritize common and officially supported compression
2746 # 3) we want to prioritize common and officially supported compression
2751 # engines
2747 # engines
2752 #
2748 #
2753 # It follows that we want to optimize for "decompress compressed data
2749 # It follows that we want to optimize for "decompress compressed data
2754 # when encoded with common and officially supported compression engines"
2750 # when encoded with common and officially supported compression engines"
2755 # case over "raw data" and "data encoded by less common or non-official
2751 # case over "raw data" and "data encoded by less common or non-official
2756 # compression engines." That is why we have the inline lookup first
2752 # compression engines." That is why we have the inline lookup first
2757 # followed by the compengines lookup.
2753 # followed by the compengines lookup.
2758 #
2754 #
2759 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2755 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2760 # compressed chunks. And this matters for changelog and manifest reads.
2756 # compressed chunks. And this matters for changelog and manifest reads.
2761 t = data[0:1]
2757 t = data[0:1]
2762
2758
2763 if t == b'x':
2759 if t == b'x':
2764 try:
2760 try:
2765 return _zlibdecompress(data)
2761 return _zlibdecompress(data)
2766 except zlib.error as e:
2762 except zlib.error as e:
2767 raise error.RevlogError(
2763 raise error.RevlogError(
2768 _(b'revlog decompress error: %s')
2764 _(b'revlog decompress error: %s')
2769 % stringutil.forcebytestr(e)
2765 % stringutil.forcebytestr(e)
2770 )
2766 )
2771 # '\0' is more common than 'u' so it goes first.
2767 # '\0' is more common than 'u' so it goes first.
2772 elif t == b'\0':
2768 elif t == b'\0':
2773 return data
2769 return data
2774 elif t == b'u':
2770 elif t == b'u':
2775 return util.buffer(data, 1)
2771 return util.buffer(data, 1)
2776
2772
2777 compressor = self._get_decompressor(t)
2773 compressor = self._get_decompressor(t)
2778
2774
2779 return compressor.decompress(data)
2775 return compressor.decompress(data)
2780
2776
2781 def _addrevision(
2777 def _addrevision(
2782 self,
2778 self,
2783 node,
2779 node,
2784 rawtext,
2780 rawtext,
2785 transaction,
2781 transaction,
2786 link,
2782 link,
2787 p1,
2783 p1,
2788 p2,
2784 p2,
2789 flags,
2785 flags,
2790 cachedelta,
2786 cachedelta,
2791 alwayscache=False,
2787 alwayscache=False,
2792 deltacomputer=None,
2788 deltacomputer=None,
2793 sidedata=None,
2789 sidedata=None,
2794 ):
2790 ):
2795 """internal function to add revisions to the log
2791 """internal function to add revisions to the log
2796
2792
2797 see addrevision for argument descriptions.
2793 see addrevision for argument descriptions.
2798
2794
2799 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2795 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2800
2796
2801 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2797 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2802 be used.
2798 be used.
2803
2799
2804 invariants:
2800 invariants:
2805 - rawtext is optional (can be None); if not set, cachedelta must be set.
2801 - rawtext is optional (can be None); if not set, cachedelta must be set.
2806 if both are set, they must correspond to each other.
2802 if both are set, they must correspond to each other.
2807 """
2803 """
2808 if node == self.nullid:
2804 if node == self.nullid:
2809 raise error.RevlogError(
2805 raise error.RevlogError(
2810 _(b"%s: attempt to add null revision") % self.display_id
2806 _(b"%s: attempt to add null revision") % self.display_id
2811 )
2807 )
2812 if (
2808 if (
2813 node == self.nodeconstants.wdirid
2809 node == self.nodeconstants.wdirid
2814 or node in self.nodeconstants.wdirfilenodeids
2810 or node in self.nodeconstants.wdirfilenodeids
2815 ):
2811 ):
2816 raise error.RevlogError(
2812 raise error.RevlogError(
2817 _(b"%s: attempt to add wdir revision") % self.display_id
2813 _(b"%s: attempt to add wdir revision") % self.display_id
2818 )
2814 )
2819 if self._writinghandles is None:
2815 if self._writinghandles is None:
2820 msg = b'adding revision outside `revlog._writing` context'
2816 msg = b'adding revision outside `revlog._writing` context'
2821 raise error.ProgrammingError(msg)
2817 raise error.ProgrammingError(msg)
2822
2818
2823 btext = [rawtext]
2819 btext = [rawtext]
2824
2820
2825 curr = len(self)
2821 curr = len(self)
2826 prev = curr - 1
2822 prev = curr - 1
2827
2823
2828 offset = self._get_data_offset(prev)
2824 offset = self._get_data_offset(prev)
2829
2825
2830 if self._concurrencychecker:
2826 if self._concurrencychecker:
2831 ifh, dfh, sdfh = self._writinghandles
2827 ifh, dfh, sdfh = self._writinghandles
2832 # XXX no checking for the sidedata file
2828 # XXX no checking for the sidedata file
2833 if self._inline:
2829 if self._inline:
2834 # offset is "as if" it were in the .d file, so we need to add on
2830 # offset is "as if" it were in the .d file, so we need to add on
2835 # the size of the entry metadata.
2831 # the size of the entry metadata.
2836 self._concurrencychecker(
2832 self._concurrencychecker(
2837 ifh, self._indexfile, offset + curr * self.index.entry_size
2833 ifh, self._indexfile, offset + curr * self.index.entry_size
2838 )
2834 )
2839 else:
2835 else:
2840 # Entries in the .i are a consistent size.
2836 # Entries in the .i are a consistent size.
2841 self._concurrencychecker(
2837 self._concurrencychecker(
2842 ifh, self._indexfile, curr * self.index.entry_size
2838 ifh, self._indexfile, curr * self.index.entry_size
2843 )
2839 )
2844 self._concurrencychecker(dfh, self._datafile, offset)
2840 self._concurrencychecker(dfh, self._datafile, offset)
2845
2841
2846 p1r, p2r = self.rev(p1), self.rev(p2)
2842 p1r, p2r = self.rev(p1), self.rev(p2)
2847
2843
2848 # full versions are inserted when the needed deltas
2844 # full versions are inserted when the needed deltas
2849 # become comparable to the uncompressed text
2845 # become comparable to the uncompressed text
2850 if rawtext is None:
2846 if rawtext is None:
2851 # need rawtext size, before changed by flag processors, which is
2847 # need rawtext size, before changed by flag processors, which is
2852 # the non-raw size. use revlog explicitly to avoid filelog's extra
2848 # the non-raw size. use revlog explicitly to avoid filelog's extra
2853 # logic that might remove metadata size.
2849 # logic that might remove metadata size.
2854 textlen = mdiff.patchedsize(
2850 textlen = mdiff.patchedsize(
2855 revlog.size(self, cachedelta[0]), cachedelta[1]
2851 revlog.size(self, cachedelta[0]), cachedelta[1]
2856 )
2852 )
2857 else:
2853 else:
2858 textlen = len(rawtext)
2854 textlen = len(rawtext)
2859
2855
2860 if deltacomputer is None:
2856 if deltacomputer is None:
2861 write_debug = None
2857 write_debug = None
2862 if self.delta_config.debug_delta:
2858 if self.delta_config.debug_delta:
2863 write_debug = transaction._report
2859 write_debug = transaction._report
2864 deltacomputer = deltautil.deltacomputer(
2860 deltacomputer = deltautil.deltacomputer(
2865 self, write_debug=write_debug
2861 self, write_debug=write_debug
2866 )
2862 )
2867
2863
2868 if cachedelta is not None and len(cachedelta) == 2:
2864 if cachedelta is not None and len(cachedelta) == 2:
2869 # If the cached delta has no information about how it should be
2865 # If the cached delta has no information about how it should be
2870 # reused, add the default reuse instruction according to the
2866 # reused, add the default reuse instruction according to the
2871 # revlog's configuration.
2867 # revlog's configuration.
2872 if (
2868 if (
2873 self.delta_config.general_delta
2869 self.delta_config.general_delta
2874 and self.delta_config.lazy_delta_base
2870 and self.delta_config.lazy_delta_base
2875 ):
2871 ):
2876 delta_base_reuse = DELTA_BASE_REUSE_TRY
2872 delta_base_reuse = DELTA_BASE_REUSE_TRY
2877 else:
2873 else:
2878 delta_base_reuse = DELTA_BASE_REUSE_NO
2874 delta_base_reuse = DELTA_BASE_REUSE_NO
2879 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2875 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2880
2876
2881 revinfo = revlogutils.revisioninfo(
2877 revinfo = revlogutils.revisioninfo(
2882 node,
2878 node,
2883 p1,
2879 p1,
2884 p2,
2880 p2,
2885 btext,
2881 btext,
2886 textlen,
2882 textlen,
2887 cachedelta,
2883 cachedelta,
2888 flags,
2884 flags,
2889 )
2885 )
2890
2886
2891 deltainfo = deltacomputer.finddeltainfo(revinfo)
2887 deltainfo = deltacomputer.finddeltainfo(revinfo)
2892
2888
2893 compression_mode = COMP_MODE_INLINE
2889 compression_mode = COMP_MODE_INLINE
2894 if self._docket is not None:
2890 if self._docket is not None:
2895 default_comp = self._docket.default_compression_header
2891 default_comp = self._docket.default_compression_header
2896 r = deltautil.delta_compression(default_comp, deltainfo)
2892 r = deltautil.delta_compression(default_comp, deltainfo)
2897 compression_mode, deltainfo = r
2893 compression_mode, deltainfo = r
2898
2894
2899 sidedata_compression_mode = COMP_MODE_INLINE
2895 sidedata_compression_mode = COMP_MODE_INLINE
2900 if sidedata and self.feature_config.has_side_data:
2896 if sidedata and self.feature_config.has_side_data:
2901 sidedata_compression_mode = COMP_MODE_PLAIN
2897 sidedata_compression_mode = COMP_MODE_PLAIN
2902 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2898 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2903 sidedata_offset = self._docket.sidedata_end
2899 sidedata_offset = self._docket.sidedata_end
2904 h, comp_sidedata = self.compress(serialized_sidedata)
2900 h, comp_sidedata = self.compress(serialized_sidedata)
2905 if (
2901 if (
2906 h != b'u'
2902 h != b'u'
2907 and comp_sidedata[0:1] != b'\0'
2903 and comp_sidedata[0:1] != b'\0'
2908 and len(comp_sidedata) < len(serialized_sidedata)
2904 and len(comp_sidedata) < len(serialized_sidedata)
2909 ):
2905 ):
2910 assert not h
2906 assert not h
2911 if (
2907 if (
2912 comp_sidedata[0:1]
2908 comp_sidedata[0:1]
2913 == self._docket.default_compression_header
2909 == self._docket.default_compression_header
2914 ):
2910 ):
2915 sidedata_compression_mode = COMP_MODE_DEFAULT
2911 sidedata_compression_mode = COMP_MODE_DEFAULT
2916 serialized_sidedata = comp_sidedata
2912 serialized_sidedata = comp_sidedata
2917 else:
2913 else:
2918 sidedata_compression_mode = COMP_MODE_INLINE
2914 sidedata_compression_mode = COMP_MODE_INLINE
2919 serialized_sidedata = comp_sidedata
2915 serialized_sidedata = comp_sidedata
2920 else:
2916 else:
2921 serialized_sidedata = b""
2917 serialized_sidedata = b""
2922 # Don't store the offset if the sidedata is empty, that way
2918 # Don't store the offset if the sidedata is empty, that way
2923 # we can easily detect empty sidedata and they will be no different
2919 # we can easily detect empty sidedata and they will be no different
2924 # than ones we manually add.
2920 # than ones we manually add.
2925 sidedata_offset = 0
2921 sidedata_offset = 0
2926
2922
2927 rank = RANK_UNKNOWN
2923 rank = RANK_UNKNOWN
2928 if self.feature_config.compute_rank:
2924 if self.feature_config.compute_rank:
2929 if (p1r, p2r) == (nullrev, nullrev):
2925 if (p1r, p2r) == (nullrev, nullrev):
2930 rank = 1
2926 rank = 1
2931 elif p1r != nullrev and p2r == nullrev:
2927 elif p1r != nullrev and p2r == nullrev:
2932 rank = 1 + self.fast_rank(p1r)
2928 rank = 1 + self.fast_rank(p1r)
2933 elif p1r == nullrev and p2r != nullrev:
2929 elif p1r == nullrev and p2r != nullrev:
2934 rank = 1 + self.fast_rank(p2r)
2930 rank = 1 + self.fast_rank(p2r)
2935 else: # merge node
2931 else: # merge node
2936 if rustdagop is not None and self.index.rust_ext_compat:
2932 if rustdagop is not None and self.index.rust_ext_compat:
2937 rank = rustdagop.rank(self.index, p1r, p2r)
2933 rank = rustdagop.rank(self.index, p1r, p2r)
2938 else:
2934 else:
2939 pmin, pmax = sorted((p1r, p2r))
2935 pmin, pmax = sorted((p1r, p2r))
2940 rank = 1 + self.fast_rank(pmax)
2936 rank = 1 + self.fast_rank(pmax)
2941 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2937 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2942
2938
2943 e = revlogutils.entry(
2939 e = revlogutils.entry(
2944 flags=flags,
2940 flags=flags,
2945 data_offset=offset,
2941 data_offset=offset,
2946 data_compressed_length=deltainfo.deltalen,
2942 data_compressed_length=deltainfo.deltalen,
2947 data_uncompressed_length=textlen,
2943 data_uncompressed_length=textlen,
2948 data_compression_mode=compression_mode,
2944 data_compression_mode=compression_mode,
2949 data_delta_base=deltainfo.base,
2945 data_delta_base=deltainfo.base,
2950 link_rev=link,
2946 link_rev=link,
2951 parent_rev_1=p1r,
2947 parent_rev_1=p1r,
2952 parent_rev_2=p2r,
2948 parent_rev_2=p2r,
2953 node_id=node,
2949 node_id=node,
2954 sidedata_offset=sidedata_offset,
2950 sidedata_offset=sidedata_offset,
2955 sidedata_compressed_length=len(serialized_sidedata),
2951 sidedata_compressed_length=len(serialized_sidedata),
2956 sidedata_compression_mode=sidedata_compression_mode,
2952 sidedata_compression_mode=sidedata_compression_mode,
2957 rank=rank,
2953 rank=rank,
2958 )
2954 )
2959
2955
2960 self.index.append(e)
2956 self.index.append(e)
2961 entry = self.index.entry_binary(curr)
2957 entry = self.index.entry_binary(curr)
2962 if curr == 0 and self._docket is None:
2958 if curr == 0 and self._docket is None:
2963 header = self._format_flags | self._format_version
2959 header = self._format_flags | self._format_version
2964 header = self.index.pack_header(header)
2960 header = self.index.pack_header(header)
2965 entry = header + entry
2961 entry = header + entry
2966 self._writeentry(
2962 self._writeentry(
2967 transaction,
2963 transaction,
2968 entry,
2964 entry,
2969 deltainfo.data,
2965 deltainfo.data,
2970 link,
2966 link,
2971 offset,
2967 offset,
2972 serialized_sidedata,
2968 serialized_sidedata,
2973 sidedata_offset,
2969 sidedata_offset,
2974 )
2970 )
2975
2971
2976 rawtext = btext[0]
2972 rawtext = btext[0]
2977
2973
2978 if alwayscache and rawtext is None:
2974 if alwayscache and rawtext is None:
2979 rawtext = deltacomputer.buildtext(revinfo)
2975 rawtext = deltacomputer.buildtext(revinfo)
2980
2976
2981 if type(rawtext) == bytes: # only accept immutable objects
2977 if type(rawtext) == bytes: # only accept immutable objects
2982 self._revisioncache = (node, curr, rawtext)
2978 self._revisioncache = (node, curr, rawtext)
2983 self._chainbasecache[curr] = deltainfo.chainbase
2979 self._chainbasecache[curr] = deltainfo.chainbase
2984 return curr
2980 return curr
2985
2981
2986 def _get_data_offset(self, prev):
2982 def _get_data_offset(self, prev):
2987 """Returns the current offset in the (in-transaction) data file.
2983 """Returns the current offset in the (in-transaction) data file.
2988 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2984 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2989 file to store that information: since sidedata can be rewritten to the
2985 file to store that information: since sidedata can be rewritten to the
2990 end of the data file within a transaction, you can have cases where, for
2986 end of the data file within a transaction, you can have cases where, for
2991 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2987 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2992 to `n - 1`'s sidedata being written after `n`'s data.
2988 to `n - 1`'s sidedata being written after `n`'s data.
2993
2989
2994 TODO cache this in a docket file before getting out of experimental."""
2990 TODO cache this in a docket file before getting out of experimental."""
2995 if self._docket is None:
2991 if self._docket is None:
2996 return self.end(prev)
2992 return self.end(prev)
2997 else:
2993 else:
2998 return self._docket.data_end
2994 return self._docket.data_end
2999
2995
3000 def _writeentry(
2996 def _writeentry(
3001 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2997 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3002 ):
2998 ):
3003 # Files opened in a+ mode have inconsistent behavior on various
2999 # Files opened in a+ mode have inconsistent behavior on various
3004 # platforms. Windows requires that a file positioning call be made
3000 # platforms. Windows requires that a file positioning call be made
3005 # when the file handle transitions between reads and writes. See
3001 # when the file handle transitions between reads and writes. See
3006 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3002 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3007 # platforms, Python or the platform itself can be buggy. Some versions
3003 # platforms, Python or the platform itself can be buggy. Some versions
3008 # of Solaris have been observed to not append at the end of the file
3004 # of Solaris have been observed to not append at the end of the file
3009 # if the file was seeked to before the end. See issue4943 for more.
3005 # if the file was seeked to before the end. See issue4943 for more.
3010 #
3006 #
3011 # We work around this issue by inserting a seek() before writing.
3007 # We work around this issue by inserting a seek() before writing.
3012 # Note: This is likely not necessary on Python 3. However, because
3008 # Note: This is likely not necessary on Python 3. However, because
3013 # the file handle is reused for reads and may be seeked there, we need
3009 # the file handle is reused for reads and may be seeked there, we need
3014 # to be careful before changing this.
3010 # to be careful before changing this.
3015 if self._writinghandles is None:
3011 if self._writinghandles is None:
3016 msg = b'adding revision outside `revlog._writing` context'
3012 msg = b'adding revision outside `revlog._writing` context'
3017 raise error.ProgrammingError(msg)
3013 raise error.ProgrammingError(msg)
3018 ifh, dfh, sdfh = self._writinghandles
3014 ifh, dfh, sdfh = self._writinghandles
3019 if self._docket is None:
3015 if self._docket is None:
3020 ifh.seek(0, os.SEEK_END)
3016 ifh.seek(0, os.SEEK_END)
3021 else:
3017 else:
3022 ifh.seek(self._docket.index_end, os.SEEK_SET)
3018 ifh.seek(self._docket.index_end, os.SEEK_SET)
3023 if dfh:
3019 if dfh:
3024 if self._docket is None:
3020 if self._docket is None:
3025 dfh.seek(0, os.SEEK_END)
3021 dfh.seek(0, os.SEEK_END)
3026 else:
3022 else:
3027 dfh.seek(self._docket.data_end, os.SEEK_SET)
3023 dfh.seek(self._docket.data_end, os.SEEK_SET)
3028 if sdfh:
3024 if sdfh:
3029 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3025 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3030
3026
3031 curr = len(self) - 1
3027 curr = len(self) - 1
3032 if not self._inline:
3028 if not self._inline:
3033 transaction.add(self._datafile, offset)
3029 transaction.add(self._datafile, offset)
3034 if self._sidedatafile:
3030 if self._sidedatafile:
3035 transaction.add(self._sidedatafile, sidedata_offset)
3031 transaction.add(self._sidedatafile, sidedata_offset)
3036 transaction.add(self._indexfile, curr * len(entry))
3032 transaction.add(self._indexfile, curr * len(entry))
3037 if data[0]:
3033 if data[0]:
3038 dfh.write(data[0])
3034 dfh.write(data[0])
3039 dfh.write(data[1])
3035 dfh.write(data[1])
3040 if sidedata:
3036 if sidedata:
3041 sdfh.write(sidedata)
3037 sdfh.write(sidedata)
3042 ifh.write(entry)
3038 ifh.write(entry)
3043 else:
3039 else:
3044 offset += curr * self.index.entry_size
3040 offset += curr * self.index.entry_size
3045 transaction.add(self._indexfile, offset)
3041 transaction.add(self._indexfile, offset)
3046 ifh.write(entry)
3042 ifh.write(entry)
3047 ifh.write(data[0])
3043 ifh.write(data[0])
3048 ifh.write(data[1])
3044 ifh.write(data[1])
3049 assert not sidedata
3045 assert not sidedata
3050 self._enforceinlinesize(transaction)
3046 self._enforceinlinesize(transaction)
3051 if self._docket is not None:
3047 if self._docket is not None:
3052 # revlog-v2 always has 3 writing handles, help Pytype
3048 # revlog-v2 always has 3 writing handles, help Pytype
3053 wh1 = self._writinghandles[0]
3049 wh1 = self._writinghandles[0]
3054 wh2 = self._writinghandles[1]
3050 wh2 = self._writinghandles[1]
3055 wh3 = self._writinghandles[2]
3051 wh3 = self._writinghandles[2]
3056 assert wh1 is not None
3052 assert wh1 is not None
3057 assert wh2 is not None
3053 assert wh2 is not None
3058 assert wh3 is not None
3054 assert wh3 is not None
3059 self._docket.index_end = wh1.tell()
3055 self._docket.index_end = wh1.tell()
3060 self._docket.data_end = wh2.tell()
3056 self._docket.data_end = wh2.tell()
3061 self._docket.sidedata_end = wh3.tell()
3057 self._docket.sidedata_end = wh3.tell()
3062
3058
3063 nodemaputil.setup_persistent_nodemap(transaction, self)
3059 nodemaputil.setup_persistent_nodemap(transaction, self)
3064
3060
3065 def addgroup(
3061 def addgroup(
3066 self,
3062 self,
3067 deltas,
3063 deltas,
3068 linkmapper,
3064 linkmapper,
3069 transaction,
3065 transaction,
3070 alwayscache=False,
3066 alwayscache=False,
3071 addrevisioncb=None,
3067 addrevisioncb=None,
3072 duplicaterevisioncb=None,
3068 duplicaterevisioncb=None,
3073 debug_info=None,
3069 debug_info=None,
3074 delta_base_reuse_policy=None,
3070 delta_base_reuse_policy=None,
3075 ):
3071 ):
3076 """
3072 """
3077 add a delta group
3073 add a delta group
3078
3074
3079 given a set of deltas, add them to the revision log. the
3075 given a set of deltas, add them to the revision log. the
3080 first delta is against its parent, which should be in our
3076 first delta is against its parent, which should be in our
3081 log, the rest are against the previous delta.
3077 log, the rest are against the previous delta.
3082
3078
3083 If ``addrevisioncb`` is defined, it will be called with arguments of
3079 If ``addrevisioncb`` is defined, it will be called with arguments of
3084 this revlog and the node that was added.
3080 this revlog and the node that was added.
3085 """
3081 """
3086
3082
3087 if self._adding_group:
3083 if self._adding_group:
3088 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3084 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3089
3085
3090 # read the default delta-base reuse policy from revlog config if the
3086 # read the default delta-base reuse policy from revlog config if the
3091 # group did not specify one.
3087 # group did not specify one.
3092 if delta_base_reuse_policy is None:
3088 if delta_base_reuse_policy is None:
3093 if (
3089 if (
3094 self.delta_config.general_delta
3090 self.delta_config.general_delta
3095 and self.delta_config.lazy_delta_base
3091 and self.delta_config.lazy_delta_base
3096 ):
3092 ):
3097 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3093 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3098 else:
3094 else:
3099 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3095 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3100
3096
3101 self._adding_group = True
3097 self._adding_group = True
3102 empty = True
3098 empty = True
3103 try:
3099 try:
3104 with self._writing(transaction):
3100 with self._writing(transaction):
3105 write_debug = None
3101 write_debug = None
3106 if self.delta_config.debug_delta:
3102 if self.delta_config.debug_delta:
3107 write_debug = transaction._report
3103 write_debug = transaction._report
3108 deltacomputer = deltautil.deltacomputer(
3104 deltacomputer = deltautil.deltacomputer(
3109 self,
3105 self,
3110 write_debug=write_debug,
3106 write_debug=write_debug,
3111 debug_info=debug_info,
3107 debug_info=debug_info,
3112 )
3108 )
3113 # loop through our set of deltas
3109 # loop through our set of deltas
3114 for data in deltas:
3110 for data in deltas:
3115 (
3111 (
3116 node,
3112 node,
3117 p1,
3113 p1,
3118 p2,
3114 p2,
3119 linknode,
3115 linknode,
3120 deltabase,
3116 deltabase,
3121 delta,
3117 delta,
3122 flags,
3118 flags,
3123 sidedata,
3119 sidedata,
3124 ) = data
3120 ) = data
3125 link = linkmapper(linknode)
3121 link = linkmapper(linknode)
3126 flags = flags or REVIDX_DEFAULT_FLAGS
3122 flags = flags or REVIDX_DEFAULT_FLAGS
3127
3123
3128 rev = self.index.get_rev(node)
3124 rev = self.index.get_rev(node)
3129 if rev is not None:
3125 if rev is not None:
3130 # this can happen if two branches make the same change
3126 # this can happen if two branches make the same change
3131 self._nodeduplicatecallback(transaction, rev)
3127 self._nodeduplicatecallback(transaction, rev)
3132 if duplicaterevisioncb:
3128 if duplicaterevisioncb:
3133 duplicaterevisioncb(self, rev)
3129 duplicaterevisioncb(self, rev)
3134 empty = False
3130 empty = False
3135 continue
3131 continue
3136
3132
3137 for p in (p1, p2):
3133 for p in (p1, p2):
3138 if not self.index.has_node(p):
3134 if not self.index.has_node(p):
3139 raise error.LookupError(
3135 raise error.LookupError(
3140 p, self.radix, _(b'unknown parent')
3136 p, self.radix, _(b'unknown parent')
3141 )
3137 )
3142
3138
3143 if not self.index.has_node(deltabase):
3139 if not self.index.has_node(deltabase):
3144 raise error.LookupError(
3140 raise error.LookupError(
3145 deltabase, self.display_id, _(b'unknown delta base')
3141 deltabase, self.display_id, _(b'unknown delta base')
3146 )
3142 )
3147
3143
3148 baserev = self.rev(deltabase)
3144 baserev = self.rev(deltabase)
3149
3145
3150 if baserev != nullrev and self.iscensored(baserev):
3146 if baserev != nullrev and self.iscensored(baserev):
3151 # if base is censored, delta must be full replacement in a
3147 # if base is censored, delta must be full replacement in a
3152 # single patch operation
3148 # single patch operation
3153 hlen = struct.calcsize(b">lll")
3149 hlen = struct.calcsize(b">lll")
3154 oldlen = self.rawsize(baserev)
3150 oldlen = self.rawsize(baserev)
3155 newlen = len(delta) - hlen
3151 newlen = len(delta) - hlen
3156 if delta[:hlen] != mdiff.replacediffheader(
3152 if delta[:hlen] != mdiff.replacediffheader(
3157 oldlen, newlen
3153 oldlen, newlen
3158 ):
3154 ):
3159 raise error.CensoredBaseError(
3155 raise error.CensoredBaseError(
3160 self.display_id, self.node(baserev)
3156 self.display_id, self.node(baserev)
3161 )
3157 )
3162
3158
3163 if not flags and self._peek_iscensored(baserev, delta):
3159 if not flags and self._peek_iscensored(baserev, delta):
3164 flags |= REVIDX_ISCENSORED
3160 flags |= REVIDX_ISCENSORED
3165
3161
3166 # We assume consumers of addrevisioncb will want to retrieve
3162 # We assume consumers of addrevisioncb will want to retrieve
3167 # the added revision, which will require a call to
3163 # the added revision, which will require a call to
3168 # revision(). revision() will fast path if there is a cache
3164 # revision(). revision() will fast path if there is a cache
3169 # hit. So, we tell _addrevision() to always cache in this case.
3165 # hit. So, we tell _addrevision() to always cache in this case.
3170 # We're only using addgroup() in the context of changegroup
3166 # We're only using addgroup() in the context of changegroup
3171 # generation so the revision data can always be handled as raw
3167 # generation so the revision data can always be handled as raw
3172 # by the flagprocessor.
3168 # by the flagprocessor.
3173 rev = self._addrevision(
3169 rev = self._addrevision(
3174 node,
3170 node,
3175 None,
3171 None,
3176 transaction,
3172 transaction,
3177 link,
3173 link,
3178 p1,
3174 p1,
3179 p2,
3175 p2,
3180 flags,
3176 flags,
3181 (baserev, delta, delta_base_reuse_policy),
3177 (baserev, delta, delta_base_reuse_policy),
3182 alwayscache=alwayscache,
3178 alwayscache=alwayscache,
3183 deltacomputer=deltacomputer,
3179 deltacomputer=deltacomputer,
3184 sidedata=sidedata,
3180 sidedata=sidedata,
3185 )
3181 )
3186
3182
3187 if addrevisioncb:
3183 if addrevisioncb:
3188 addrevisioncb(self, rev)
3184 addrevisioncb(self, rev)
3189 empty = False
3185 empty = False
3190 finally:
3186 finally:
3191 self._adding_group = False
3187 self._adding_group = False
3192 return not empty
3188 return not empty
3193
3189
3194 def iscensored(self, rev):
3190 def iscensored(self, rev):
3195 """Check if a file revision is censored."""
3191 """Check if a file revision is censored."""
3196 if not self.feature_config.censorable:
3192 if not self.feature_config.censorable:
3197 return False
3193 return False
3198
3194
3199 return self.flags(rev) & REVIDX_ISCENSORED
3195 return self.flags(rev) & REVIDX_ISCENSORED
3200
3196
3201 def _peek_iscensored(self, baserev, delta):
3197 def _peek_iscensored(self, baserev, delta):
3202 """Quickly check if a delta produces a censored revision."""
3198 """Quickly check if a delta produces a censored revision."""
3203 if not self.feature_config.censorable:
3199 if not self.feature_config.censorable:
3204 return False
3200 return False
3205
3201
3206 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3202 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3207
3203
3208 def getstrippoint(self, minlink):
3204 def getstrippoint(self, minlink):
3209 """find the minimum rev that must be stripped to strip the linkrev
3205 """find the minimum rev that must be stripped to strip the linkrev
3210
3206
3211 Returns a tuple containing the minimum rev and a set of all revs that
3207 Returns a tuple containing the minimum rev and a set of all revs that
3212 have linkrevs that will be broken by this strip.
3208 have linkrevs that will be broken by this strip.
3213 """
3209 """
3214 return storageutil.resolvestripinfo(
3210 return storageutil.resolvestripinfo(
3215 minlink,
3211 minlink,
3216 len(self) - 1,
3212 len(self) - 1,
3217 self.headrevs(),
3213 self.headrevs(),
3218 self.linkrev,
3214 self.linkrev,
3219 self.parentrevs,
3215 self.parentrevs,
3220 )
3216 )
3221
3217
3222 def strip(self, minlink, transaction):
3218 def strip(self, minlink, transaction):
3223 """truncate the revlog on the first revision with a linkrev >= minlink
3219 """truncate the revlog on the first revision with a linkrev >= minlink
3224
3220
3225 This function is called when we're stripping revision minlink and
3221 This function is called when we're stripping revision minlink and
3226 its descendants from the repository.
3222 its descendants from the repository.
3227
3223
3228 We have to remove all revisions with linkrev >= minlink, because
3224 We have to remove all revisions with linkrev >= minlink, because
3229 the equivalent changelog revisions will be renumbered after the
3225 the equivalent changelog revisions will be renumbered after the
3230 strip.
3226 strip.
3231
3227
3232 So we truncate the revlog on the first of these revisions, and
3228 So we truncate the revlog on the first of these revisions, and
3233 trust that the caller has saved the revisions that shouldn't be
3229 trust that the caller has saved the revisions that shouldn't be
3234 removed and that it'll re-add them after this truncation.
3230 removed and that it'll re-add them after this truncation.
3235 """
3231 """
3236 if len(self) == 0:
3232 if len(self) == 0:
3237 return
3233 return
3238
3234
3239 rev, _ = self.getstrippoint(minlink)
3235 rev, _ = self.getstrippoint(minlink)
3240 if rev == len(self):
3236 if rev == len(self):
3241 return
3237 return
3242
3238
3243 # first truncate the files on disk
3239 # first truncate the files on disk
3244 data_end = self.start(rev)
3240 data_end = self.start(rev)
3245 if not self._inline:
3241 if not self._inline:
3246 transaction.add(self._datafile, data_end)
3242 transaction.add(self._datafile, data_end)
3247 end = rev * self.index.entry_size
3243 end = rev * self.index.entry_size
3248 else:
3244 else:
3249 end = data_end + (rev * self.index.entry_size)
3245 end = data_end + (rev * self.index.entry_size)
3250
3246
3251 if self._sidedatafile:
3247 if self._sidedatafile:
3252 sidedata_end = self.sidedata_cut_off(rev)
3248 sidedata_end = self.sidedata_cut_off(rev)
3253 transaction.add(self._sidedatafile, sidedata_end)
3249 transaction.add(self._sidedatafile, sidedata_end)
3254
3250
3255 transaction.add(self._indexfile, end)
3251 transaction.add(self._indexfile, end)
3256 if self._docket is not None:
3252 if self._docket is not None:
3257 # XXX we could, leverage the docket while stripping. However it is
3253 # XXX we could, leverage the docket while stripping. However it is
3258 # not powerfull enough at the time of this comment
3254 # not powerfull enough at the time of this comment
3259 self._docket.index_end = end
3255 self._docket.index_end = end
3260 self._docket.data_end = data_end
3256 self._docket.data_end = data_end
3261 self._docket.sidedata_end = sidedata_end
3257 self._docket.sidedata_end = sidedata_end
3262 self._docket.write(transaction, stripping=True)
3258 self._docket.write(transaction, stripping=True)
3263
3259
3264 # then reset internal state in memory to forget those revisions
3260 # then reset internal state in memory to forget those revisions
3265 self._revisioncache = None
3261 self._revisioncache = None
3266 self._chaininfocache = util.lrucachedict(500)
3262 self._chaininfocache = util.lrucachedict(500)
3267 self._segmentfile.clear_cache()
3263 self._segmentfile.clear_cache()
3268 self._segmentfile_sidedata.clear_cache()
3264 self._segmentfile_sidedata.clear_cache()
3269
3265
3270 del self.index[rev:-1]
3266 del self.index[rev:-1]
3271
3267
3272 def checksize(self):
3268 def checksize(self):
3273 """Check size of index and data files
3269 """Check size of index and data files
3274
3270
3275 return a (dd, di) tuple.
3271 return a (dd, di) tuple.
3276 - dd: extra bytes for the "data" file
3272 - dd: extra bytes for the "data" file
3277 - di: extra bytes for the "index" file
3273 - di: extra bytes for the "index" file
3278
3274
3279 A healthy revlog will return (0, 0).
3275 A healthy revlog will return (0, 0).
3280 """
3276 """
3281 expected = 0
3277 expected = 0
3282 if len(self):
3278 if len(self):
3283 expected = max(0, self.end(len(self) - 1))
3279 expected = max(0, self.end(len(self) - 1))
3284
3280
3285 try:
3281 try:
3286 with self._datafp() as f:
3282 with self._datafp() as f:
3287 f.seek(0, io.SEEK_END)
3283 f.seek(0, io.SEEK_END)
3288 actual = f.tell()
3284 actual = f.tell()
3289 dd = actual - expected
3285 dd = actual - expected
3290 except FileNotFoundError:
3286 except FileNotFoundError:
3291 dd = 0
3287 dd = 0
3292
3288
3293 try:
3289 try:
3294 f = self.opener(self._indexfile)
3290 f = self.opener(self._indexfile)
3295 f.seek(0, io.SEEK_END)
3291 f.seek(0, io.SEEK_END)
3296 actual = f.tell()
3292 actual = f.tell()
3297 f.close()
3293 f.close()
3298 s = self.index.entry_size
3294 s = self.index.entry_size
3299 i = max(0, actual // s)
3295 i = max(0, actual // s)
3300 di = actual - (i * s)
3296 di = actual - (i * s)
3301 if self._inline:
3297 if self._inline:
3302 databytes = 0
3298 databytes = 0
3303 for r in self:
3299 for r in self:
3304 databytes += max(0, self.length(r))
3300 databytes += max(0, self.length(r))
3305 dd = 0
3301 dd = 0
3306 di = actual - len(self) * s - databytes
3302 di = actual - len(self) * s - databytes
3307 except FileNotFoundError:
3303 except FileNotFoundError:
3308 di = 0
3304 di = 0
3309
3305
3310 return (dd, di)
3306 return (dd, di)
3311
3307
3312 def files(self):
3308 def files(self):
3313 res = [self._indexfile]
3309 res = [self._indexfile]
3314 if self._docket_file is None:
3310 if self._docket_file is None:
3315 if not self._inline:
3311 if not self._inline:
3316 res.append(self._datafile)
3312 res.append(self._datafile)
3317 else:
3313 else:
3318 res.append(self._docket_file)
3314 res.append(self._docket_file)
3319 res.extend(self._docket.old_index_filepaths(include_empty=False))
3315 res.extend(self._docket.old_index_filepaths(include_empty=False))
3320 if self._docket.data_end:
3316 if self._docket.data_end:
3321 res.append(self._datafile)
3317 res.append(self._datafile)
3322 res.extend(self._docket.old_data_filepaths(include_empty=False))
3318 res.extend(self._docket.old_data_filepaths(include_empty=False))
3323 if self._docket.sidedata_end:
3319 if self._docket.sidedata_end:
3324 res.append(self._sidedatafile)
3320 res.append(self._sidedatafile)
3325 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3321 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3326 return res
3322 return res
3327
3323
3328 def emitrevisions(
3324 def emitrevisions(
3329 self,
3325 self,
3330 nodes,
3326 nodes,
3331 nodesorder=None,
3327 nodesorder=None,
3332 revisiondata=False,
3328 revisiondata=False,
3333 assumehaveparentrevisions=False,
3329 assumehaveparentrevisions=False,
3334 deltamode=repository.CG_DELTAMODE_STD,
3330 deltamode=repository.CG_DELTAMODE_STD,
3335 sidedata_helpers=None,
3331 sidedata_helpers=None,
3336 debug_info=None,
3332 debug_info=None,
3337 ):
3333 ):
3338 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3334 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3339 raise error.ProgrammingError(
3335 raise error.ProgrammingError(
3340 b'unhandled value for nodesorder: %s' % nodesorder
3336 b'unhandled value for nodesorder: %s' % nodesorder
3341 )
3337 )
3342
3338
3343 if nodesorder is None and not self.delta_config.general_delta:
3339 if nodesorder is None and not self.delta_config.general_delta:
3344 nodesorder = b'storage'
3340 nodesorder = b'storage'
3345
3341
3346 if (
3342 if (
3347 not self._storedeltachains
3343 not self._storedeltachains
3348 and deltamode != repository.CG_DELTAMODE_PREV
3344 and deltamode != repository.CG_DELTAMODE_PREV
3349 ):
3345 ):
3350 deltamode = repository.CG_DELTAMODE_FULL
3346 deltamode = repository.CG_DELTAMODE_FULL
3351
3347
3352 return storageutil.emitrevisions(
3348 return storageutil.emitrevisions(
3353 self,
3349 self,
3354 nodes,
3350 nodes,
3355 nodesorder,
3351 nodesorder,
3356 revlogrevisiondelta,
3352 revlogrevisiondelta,
3357 deltaparentfn=self.deltaparent,
3353 deltaparentfn=self.deltaparent,
3358 candeltafn=self._candelta,
3354 candeltafn=self._candelta,
3359 rawsizefn=self.rawsize,
3355 rawsizefn=self.rawsize,
3360 revdifffn=self.revdiff,
3356 revdifffn=self.revdiff,
3361 flagsfn=self.flags,
3357 flagsfn=self.flags,
3362 deltamode=deltamode,
3358 deltamode=deltamode,
3363 revisiondata=revisiondata,
3359 revisiondata=revisiondata,
3364 assumehaveparentrevisions=assumehaveparentrevisions,
3360 assumehaveparentrevisions=assumehaveparentrevisions,
3365 sidedata_helpers=sidedata_helpers,
3361 sidedata_helpers=sidedata_helpers,
3366 debug_info=debug_info,
3362 debug_info=debug_info,
3367 )
3363 )
3368
3364
3369 DELTAREUSEALWAYS = b'always'
3365 DELTAREUSEALWAYS = b'always'
3370 DELTAREUSESAMEREVS = b'samerevs'
3366 DELTAREUSESAMEREVS = b'samerevs'
3371 DELTAREUSENEVER = b'never'
3367 DELTAREUSENEVER = b'never'
3372
3368
3373 DELTAREUSEFULLADD = b'fulladd'
3369 DELTAREUSEFULLADD = b'fulladd'
3374
3370
3375 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3371 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3376
3372
3377 def clone(
3373 def clone(
3378 self,
3374 self,
3379 tr,
3375 tr,
3380 destrevlog,
3376 destrevlog,
3381 addrevisioncb=None,
3377 addrevisioncb=None,
3382 deltareuse=DELTAREUSESAMEREVS,
3378 deltareuse=DELTAREUSESAMEREVS,
3383 forcedeltabothparents=None,
3379 forcedeltabothparents=None,
3384 sidedata_helpers=None,
3380 sidedata_helpers=None,
3385 ):
3381 ):
3386 """Copy this revlog to another, possibly with format changes.
3382 """Copy this revlog to another, possibly with format changes.
3387
3383
3388 The destination revlog will contain the same revisions and nodes.
3384 The destination revlog will contain the same revisions and nodes.
3389 However, it may not be bit-for-bit identical due to e.g. delta encoding
3385 However, it may not be bit-for-bit identical due to e.g. delta encoding
3390 differences.
3386 differences.
3391
3387
3392 The ``deltareuse`` argument control how deltas from the existing revlog
3388 The ``deltareuse`` argument control how deltas from the existing revlog
3393 are preserved in the destination revlog. The argument can have the
3389 are preserved in the destination revlog. The argument can have the
3394 following values:
3390 following values:
3395
3391
3396 DELTAREUSEALWAYS
3392 DELTAREUSEALWAYS
3397 Deltas will always be reused (if possible), even if the destination
3393 Deltas will always be reused (if possible), even if the destination
3398 revlog would not select the same revisions for the delta. This is the
3394 revlog would not select the same revisions for the delta. This is the
3399 fastest mode of operation.
3395 fastest mode of operation.
3400 DELTAREUSESAMEREVS
3396 DELTAREUSESAMEREVS
3401 Deltas will be reused if the destination revlog would pick the same
3397 Deltas will be reused if the destination revlog would pick the same
3402 revisions for the delta. This mode strikes a balance between speed
3398 revisions for the delta. This mode strikes a balance between speed
3403 and optimization.
3399 and optimization.
3404 DELTAREUSENEVER
3400 DELTAREUSENEVER
3405 Deltas will never be reused. This is the slowest mode of execution.
3401 Deltas will never be reused. This is the slowest mode of execution.
3406 This mode can be used to recompute deltas (e.g. if the diff/delta
3402 This mode can be used to recompute deltas (e.g. if the diff/delta
3407 algorithm changes).
3403 algorithm changes).
3408 DELTAREUSEFULLADD
3404 DELTAREUSEFULLADD
3409 Revision will be re-added as if their were new content. This is
3405 Revision will be re-added as if their were new content. This is
3410 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3406 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3411 eg: large file detection and handling.
3407 eg: large file detection and handling.
3412
3408
3413 Delta computation can be slow, so the choice of delta reuse policy can
3409 Delta computation can be slow, so the choice of delta reuse policy can
3414 significantly affect run time.
3410 significantly affect run time.
3415
3411
3416 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3412 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3417 two extremes. Deltas will be reused if they are appropriate. But if the
3413 two extremes. Deltas will be reused if they are appropriate. But if the
3418 delta could choose a better revision, it will do so. This means if you
3414 delta could choose a better revision, it will do so. This means if you
3419 are converting a non-generaldelta revlog to a generaldelta revlog,
3415 are converting a non-generaldelta revlog to a generaldelta revlog,
3420 deltas will be recomputed if the delta's parent isn't a parent of the
3416 deltas will be recomputed if the delta's parent isn't a parent of the
3421 revision.
3417 revision.
3422
3418
3423 In addition to the delta policy, the ``forcedeltabothparents``
3419 In addition to the delta policy, the ``forcedeltabothparents``
3424 argument controls whether to force compute deltas against both parents
3420 argument controls whether to force compute deltas against both parents
3425 for merges. By default, the current default is used.
3421 for merges. By default, the current default is used.
3426
3422
3427 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3423 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3428 `sidedata_helpers`.
3424 `sidedata_helpers`.
3429 """
3425 """
3430 if deltareuse not in self.DELTAREUSEALL:
3426 if deltareuse not in self.DELTAREUSEALL:
3431 raise ValueError(
3427 raise ValueError(
3432 _(b'value for deltareuse invalid: %s') % deltareuse
3428 _(b'value for deltareuse invalid: %s') % deltareuse
3433 )
3429 )
3434
3430
3435 if len(destrevlog):
3431 if len(destrevlog):
3436 raise ValueError(_(b'destination revlog is not empty'))
3432 raise ValueError(_(b'destination revlog is not empty'))
3437
3433
3438 if getattr(self, 'filteredrevs', None):
3434 if getattr(self, 'filteredrevs', None):
3439 raise ValueError(_(b'source revlog has filtered revisions'))
3435 raise ValueError(_(b'source revlog has filtered revisions'))
3440 if getattr(destrevlog, 'filteredrevs', None):
3436 if getattr(destrevlog, 'filteredrevs', None):
3441 raise ValueError(_(b'destination revlog has filtered revisions'))
3437 raise ValueError(_(b'destination revlog has filtered revisions'))
3442
3438
3443 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3439 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3444 # if possible.
3440 # if possible.
3445 old_delta_config = destrevlog.delta_config
3441 old_delta_config = destrevlog.delta_config
3446 destrevlog.delta_config = destrevlog.delta_config.copy()
3442 destrevlog.delta_config = destrevlog.delta_config.copy()
3447
3443
3448 try:
3444 try:
3449 if deltareuse == self.DELTAREUSEALWAYS:
3445 if deltareuse == self.DELTAREUSEALWAYS:
3450 destrevlog.delta_config.lazy_delta_base = True
3446 destrevlog.delta_config.lazy_delta_base = True
3451 destrevlog.delta_config.lazy_delta = True
3447 destrevlog.delta_config.lazy_delta = True
3452 elif deltareuse == self.DELTAREUSESAMEREVS:
3448 elif deltareuse == self.DELTAREUSESAMEREVS:
3453 destrevlog.delta_config.lazy_delta_base = False
3449 destrevlog.delta_config.lazy_delta_base = False
3454 destrevlog.delta_config.lazy_delta = True
3450 destrevlog.delta_config.lazy_delta = True
3455 elif deltareuse == self.DELTAREUSENEVER:
3451 elif deltareuse == self.DELTAREUSENEVER:
3456 destrevlog.delta_config.lazy_delta_base = False
3452 destrevlog.delta_config.lazy_delta_base = False
3457 destrevlog.delta_config.lazy_delta = False
3453 destrevlog.delta_config.lazy_delta = False
3458
3454
3459 delta_both_parents = (
3455 delta_both_parents = (
3460 forcedeltabothparents or old_delta_config.delta_both_parents
3456 forcedeltabothparents or old_delta_config.delta_both_parents
3461 )
3457 )
3462 destrevlog.delta_config.delta_both_parents = delta_both_parents
3458 destrevlog.delta_config.delta_both_parents = delta_both_parents
3463
3459
3464 with self.reading(), destrevlog._writing(tr):
3460 with self.reading(), destrevlog._writing(tr):
3465 self._clone(
3461 self._clone(
3466 tr,
3462 tr,
3467 destrevlog,
3463 destrevlog,
3468 addrevisioncb,
3464 addrevisioncb,
3469 deltareuse,
3465 deltareuse,
3470 forcedeltabothparents,
3466 forcedeltabothparents,
3471 sidedata_helpers,
3467 sidedata_helpers,
3472 )
3468 )
3473
3469
3474 finally:
3470 finally:
3475 destrevlog.delta_config = old_delta_config
3471 destrevlog.delta_config = old_delta_config
3476
3472
3477 def _clone(
3473 def _clone(
3478 self,
3474 self,
3479 tr,
3475 tr,
3480 destrevlog,
3476 destrevlog,
3481 addrevisioncb,
3477 addrevisioncb,
3482 deltareuse,
3478 deltareuse,
3483 forcedeltabothparents,
3479 forcedeltabothparents,
3484 sidedata_helpers,
3480 sidedata_helpers,
3485 ):
3481 ):
3486 """perform the core duty of `revlog.clone` after parameter processing"""
3482 """perform the core duty of `revlog.clone` after parameter processing"""
3487 write_debug = None
3483 write_debug = None
3488 if self.delta_config.debug_delta:
3484 if self.delta_config.debug_delta:
3489 write_debug = tr._report
3485 write_debug = tr._report
3490 deltacomputer = deltautil.deltacomputer(
3486 deltacomputer = deltautil.deltacomputer(
3491 destrevlog,
3487 destrevlog,
3492 write_debug=write_debug,
3488 write_debug=write_debug,
3493 )
3489 )
3494 index = self.index
3490 index = self.index
3495 for rev in self:
3491 for rev in self:
3496 entry = index[rev]
3492 entry = index[rev]
3497
3493
3498 # Some classes override linkrev to take filtered revs into
3494 # Some classes override linkrev to take filtered revs into
3499 # account. Use raw entry from index.
3495 # account. Use raw entry from index.
3500 flags = entry[0] & 0xFFFF
3496 flags = entry[0] & 0xFFFF
3501 linkrev = entry[4]
3497 linkrev = entry[4]
3502 p1 = index[entry[5]][7]
3498 p1 = index[entry[5]][7]
3503 p2 = index[entry[6]][7]
3499 p2 = index[entry[6]][7]
3504 node = entry[7]
3500 node = entry[7]
3505
3501
3506 # (Possibly) reuse the delta from the revlog if allowed and
3502 # (Possibly) reuse the delta from the revlog if allowed and
3507 # the revlog chunk is a delta.
3503 # the revlog chunk is a delta.
3508 cachedelta = None
3504 cachedelta = None
3509 rawtext = None
3505 rawtext = None
3510 if deltareuse == self.DELTAREUSEFULLADD:
3506 if deltareuse == self.DELTAREUSEFULLADD:
3511 text = self._revisiondata(rev)
3507 text = self._revisiondata(rev)
3512 sidedata = self.sidedata(rev)
3508 sidedata = self.sidedata(rev)
3513
3509
3514 if sidedata_helpers is not None:
3510 if sidedata_helpers is not None:
3515 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3511 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3516 self, sidedata_helpers, sidedata, rev
3512 self, sidedata_helpers, sidedata, rev
3517 )
3513 )
3518 flags = flags | new_flags[0] & ~new_flags[1]
3514 flags = flags | new_flags[0] & ~new_flags[1]
3519
3515
3520 destrevlog.addrevision(
3516 destrevlog.addrevision(
3521 text,
3517 text,
3522 tr,
3518 tr,
3523 linkrev,
3519 linkrev,
3524 p1,
3520 p1,
3525 p2,
3521 p2,
3526 cachedelta=cachedelta,
3522 cachedelta=cachedelta,
3527 node=node,
3523 node=node,
3528 flags=flags,
3524 flags=flags,
3529 deltacomputer=deltacomputer,
3525 deltacomputer=deltacomputer,
3530 sidedata=sidedata,
3526 sidedata=sidedata,
3531 )
3527 )
3532 else:
3528 else:
3533 if destrevlog.delta_config.lazy_delta:
3529 if destrevlog.delta_config.lazy_delta:
3534 dp = self.deltaparent(rev)
3530 dp = self.deltaparent(rev)
3535 if dp != nullrev:
3531 if dp != nullrev:
3536 cachedelta = (dp, bytes(self._chunk(rev)))
3532 cachedelta = (dp, bytes(self._chunk(rev)))
3537
3533
3538 sidedata = None
3534 sidedata = None
3539 if not cachedelta:
3535 if not cachedelta:
3540 try:
3536 try:
3541 rawtext = self._revisiondata(rev)
3537 rawtext = self._revisiondata(rev)
3542 except error.CensoredNodeError as censored:
3538 except error.CensoredNodeError as censored:
3543 assert flags & REVIDX_ISCENSORED
3539 assert flags & REVIDX_ISCENSORED
3544 rawtext = censored.tombstone
3540 rawtext = censored.tombstone
3545 sidedata = self.sidedata(rev)
3541 sidedata = self.sidedata(rev)
3546 if sidedata is None:
3542 if sidedata is None:
3547 sidedata = self.sidedata(rev)
3543 sidedata = self.sidedata(rev)
3548
3544
3549 if sidedata_helpers is not None:
3545 if sidedata_helpers is not None:
3550 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3546 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3551 self, sidedata_helpers, sidedata, rev
3547 self, sidedata_helpers, sidedata, rev
3552 )
3548 )
3553 flags = flags | new_flags[0] & ~new_flags[1]
3549 flags = flags | new_flags[0] & ~new_flags[1]
3554
3550
3555 destrevlog._addrevision(
3551 destrevlog._addrevision(
3556 node,
3552 node,
3557 rawtext,
3553 rawtext,
3558 tr,
3554 tr,
3559 linkrev,
3555 linkrev,
3560 p1,
3556 p1,
3561 p2,
3557 p2,
3562 flags,
3558 flags,
3563 cachedelta,
3559 cachedelta,
3564 deltacomputer=deltacomputer,
3560 deltacomputer=deltacomputer,
3565 sidedata=sidedata,
3561 sidedata=sidedata,
3566 )
3562 )
3567
3563
3568 if addrevisioncb:
3564 if addrevisioncb:
3569 addrevisioncb(self, rev, node)
3565 addrevisioncb(self, rev, node)
3570
3566
3571 def censorrevision(self, tr, censornode, tombstone=b''):
3567 def censorrevision(self, tr, censornode, tombstone=b''):
3572 if self._format_version == REVLOGV0:
3568 if self._format_version == REVLOGV0:
3573 raise error.RevlogError(
3569 raise error.RevlogError(
3574 _(b'cannot censor with version %d revlogs')
3570 _(b'cannot censor with version %d revlogs')
3575 % self._format_version
3571 % self._format_version
3576 )
3572 )
3577 elif self._format_version == REVLOGV1:
3573 elif self._format_version == REVLOGV1:
3578 rewrite.v1_censor(self, tr, censornode, tombstone)
3574 rewrite.v1_censor(self, tr, censornode, tombstone)
3579 else:
3575 else:
3580 rewrite.v2_censor(self, tr, censornode, tombstone)
3576 rewrite.v2_censor(self, tr, censornode, tombstone)
3581
3577
3582 def verifyintegrity(self, state):
3578 def verifyintegrity(self, state):
3583 """Verifies the integrity of the revlog.
3579 """Verifies the integrity of the revlog.
3584
3580
3585 Yields ``revlogproblem`` instances describing problems that are
3581 Yields ``revlogproblem`` instances describing problems that are
3586 found.
3582 found.
3587 """
3583 """
3588 dd, di = self.checksize()
3584 dd, di = self.checksize()
3589 if dd:
3585 if dd:
3590 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3586 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3591 if di:
3587 if di:
3592 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3588 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3593
3589
3594 version = self._format_version
3590 version = self._format_version
3595
3591
3596 # The verifier tells us what version revlog we should be.
3592 # The verifier tells us what version revlog we should be.
3597 if version != state[b'expectedversion']:
3593 if version != state[b'expectedversion']:
3598 yield revlogproblem(
3594 yield revlogproblem(
3599 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3595 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3600 % (self.display_id, version, state[b'expectedversion'])
3596 % (self.display_id, version, state[b'expectedversion'])
3601 )
3597 )
3602
3598
3603 state[b'skipread'] = set()
3599 state[b'skipread'] = set()
3604 state[b'safe_renamed'] = set()
3600 state[b'safe_renamed'] = set()
3605
3601
3606 for rev in self:
3602 for rev in self:
3607 node = self.node(rev)
3603 node = self.node(rev)
3608
3604
3609 # Verify contents. 4 cases to care about:
3605 # Verify contents. 4 cases to care about:
3610 #
3606 #
3611 # common: the most common case
3607 # common: the most common case
3612 # rename: with a rename
3608 # rename: with a rename
3613 # meta: file content starts with b'\1\n', the metadata
3609 # meta: file content starts with b'\1\n', the metadata
3614 # header defined in filelog.py, but without a rename
3610 # header defined in filelog.py, but without a rename
3615 # ext: content stored externally
3611 # ext: content stored externally
3616 #
3612 #
3617 # More formally, their differences are shown below:
3613 # More formally, their differences are shown below:
3618 #
3614 #
3619 # | common | rename | meta | ext
3615 # | common | rename | meta | ext
3620 # -------------------------------------------------------
3616 # -------------------------------------------------------
3621 # flags() | 0 | 0 | 0 | not 0
3617 # flags() | 0 | 0 | 0 | not 0
3622 # renamed() | False | True | False | ?
3618 # renamed() | False | True | False | ?
3623 # rawtext[0:2]=='\1\n'| False | True | True | ?
3619 # rawtext[0:2]=='\1\n'| False | True | True | ?
3624 #
3620 #
3625 # "rawtext" means the raw text stored in revlog data, which
3621 # "rawtext" means the raw text stored in revlog data, which
3626 # could be retrieved by "rawdata(rev)". "text"
3622 # could be retrieved by "rawdata(rev)". "text"
3627 # mentioned below is "revision(rev)".
3623 # mentioned below is "revision(rev)".
3628 #
3624 #
3629 # There are 3 different lengths stored physically:
3625 # There are 3 different lengths stored physically:
3630 # 1. L1: rawsize, stored in revlog index
3626 # 1. L1: rawsize, stored in revlog index
3631 # 2. L2: len(rawtext), stored in revlog data
3627 # 2. L2: len(rawtext), stored in revlog data
3632 # 3. L3: len(text), stored in revlog data if flags==0, or
3628 # 3. L3: len(text), stored in revlog data if flags==0, or
3633 # possibly somewhere else if flags!=0
3629 # possibly somewhere else if flags!=0
3634 #
3630 #
3635 # L1 should be equal to L2. L3 could be different from them.
3631 # L1 should be equal to L2. L3 could be different from them.
3636 # "text" may or may not affect commit hash depending on flag
3632 # "text" may or may not affect commit hash depending on flag
3637 # processors (see flagutil.addflagprocessor).
3633 # processors (see flagutil.addflagprocessor).
3638 #
3634 #
3639 # | common | rename | meta | ext
3635 # | common | rename | meta | ext
3640 # -------------------------------------------------
3636 # -------------------------------------------------
3641 # rawsize() | L1 | L1 | L1 | L1
3637 # rawsize() | L1 | L1 | L1 | L1
3642 # size() | L1 | L2-LM | L1(*) | L1 (?)
3638 # size() | L1 | L2-LM | L1(*) | L1 (?)
3643 # len(rawtext) | L2 | L2 | L2 | L2
3639 # len(rawtext) | L2 | L2 | L2 | L2
3644 # len(text) | L2 | L2 | L2 | L3
3640 # len(text) | L2 | L2 | L2 | L3
3645 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3641 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3646 #
3642 #
3647 # LM: length of metadata, depending on rawtext
3643 # LM: length of metadata, depending on rawtext
3648 # (*): not ideal, see comment in filelog.size
3644 # (*): not ideal, see comment in filelog.size
3649 # (?): could be "- len(meta)" if the resolved content has
3645 # (?): could be "- len(meta)" if the resolved content has
3650 # rename metadata
3646 # rename metadata
3651 #
3647 #
3652 # Checks needed to be done:
3648 # Checks needed to be done:
3653 # 1. length check: L1 == L2, in all cases.
3649 # 1. length check: L1 == L2, in all cases.
3654 # 2. hash check: depending on flag processor, we may need to
3650 # 2. hash check: depending on flag processor, we may need to
3655 # use either "text" (external), or "rawtext" (in revlog).
3651 # use either "text" (external), or "rawtext" (in revlog).
3656
3652
3657 try:
3653 try:
3658 skipflags = state.get(b'skipflags', 0)
3654 skipflags = state.get(b'skipflags', 0)
3659 if skipflags:
3655 if skipflags:
3660 skipflags &= self.flags(rev)
3656 skipflags &= self.flags(rev)
3661
3657
3662 _verify_revision(self, skipflags, state, node)
3658 _verify_revision(self, skipflags, state, node)
3663
3659
3664 l1 = self.rawsize(rev)
3660 l1 = self.rawsize(rev)
3665 l2 = len(self.rawdata(node))
3661 l2 = len(self.rawdata(node))
3666
3662
3667 if l1 != l2:
3663 if l1 != l2:
3668 yield revlogproblem(
3664 yield revlogproblem(
3669 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3665 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3670 node=node,
3666 node=node,
3671 )
3667 )
3672
3668
3673 except error.CensoredNodeError:
3669 except error.CensoredNodeError:
3674 if state[b'erroroncensored']:
3670 if state[b'erroroncensored']:
3675 yield revlogproblem(
3671 yield revlogproblem(
3676 error=_(b'censored file data'), node=node
3672 error=_(b'censored file data'), node=node
3677 )
3673 )
3678 state[b'skipread'].add(node)
3674 state[b'skipread'].add(node)
3679 except Exception as e:
3675 except Exception as e:
3680 yield revlogproblem(
3676 yield revlogproblem(
3681 error=_(b'unpacking %s: %s')
3677 error=_(b'unpacking %s: %s')
3682 % (short(node), stringutil.forcebytestr(e)),
3678 % (short(node), stringutil.forcebytestr(e)),
3683 node=node,
3679 node=node,
3684 )
3680 )
3685 state[b'skipread'].add(node)
3681 state[b'skipread'].add(node)
3686
3682
3687 def storageinfo(
3683 def storageinfo(
3688 self,
3684 self,
3689 exclusivefiles=False,
3685 exclusivefiles=False,
3690 sharedfiles=False,
3686 sharedfiles=False,
3691 revisionscount=False,
3687 revisionscount=False,
3692 trackedsize=False,
3688 trackedsize=False,
3693 storedsize=False,
3689 storedsize=False,
3694 ):
3690 ):
3695 d = {}
3691 d = {}
3696
3692
3697 if exclusivefiles:
3693 if exclusivefiles:
3698 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3694 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3699 if not self._inline:
3695 if not self._inline:
3700 d[b'exclusivefiles'].append((self.opener, self._datafile))
3696 d[b'exclusivefiles'].append((self.opener, self._datafile))
3701
3697
3702 if sharedfiles:
3698 if sharedfiles:
3703 d[b'sharedfiles'] = []
3699 d[b'sharedfiles'] = []
3704
3700
3705 if revisionscount:
3701 if revisionscount:
3706 d[b'revisionscount'] = len(self)
3702 d[b'revisionscount'] = len(self)
3707
3703
3708 if trackedsize:
3704 if trackedsize:
3709 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3705 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3710
3706
3711 if storedsize:
3707 if storedsize:
3712 d[b'storedsize'] = sum(
3708 d[b'storedsize'] = sum(
3713 self.opener.stat(path).st_size for path in self.files()
3709 self.opener.stat(path).st_size for path in self.files()
3714 )
3710 )
3715
3711
3716 return d
3712 return d
3717
3713
3718 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3714 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3719 if not self.feature_config.has_side_data:
3715 if not self.feature_config.has_side_data:
3720 return
3716 return
3721 # revlog formats with sidedata support does not support inline
3717 # revlog formats with sidedata support does not support inline
3722 assert not self._inline
3718 assert not self._inline
3723 if not helpers[1] and not helpers[2]:
3719 if not helpers[1] and not helpers[2]:
3724 # Nothing to generate or remove
3720 # Nothing to generate or remove
3725 return
3721 return
3726
3722
3727 new_entries = []
3723 new_entries = []
3728 # append the new sidedata
3724 # append the new sidedata
3729 with self._writing(transaction):
3725 with self._writing(transaction):
3730 ifh, dfh, sdfh = self._writinghandles
3726 ifh, dfh, sdfh = self._writinghandles
3731 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3727 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3732
3728
3733 current_offset = sdfh.tell()
3729 current_offset = sdfh.tell()
3734 for rev in range(startrev, endrev + 1):
3730 for rev in range(startrev, endrev + 1):
3735 entry = self.index[rev]
3731 entry = self.index[rev]
3736 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3732 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3737 store=self,
3733 store=self,
3738 sidedata_helpers=helpers,
3734 sidedata_helpers=helpers,
3739 sidedata={},
3735 sidedata={},
3740 rev=rev,
3736 rev=rev,
3741 )
3737 )
3742
3738
3743 serialized_sidedata = sidedatautil.serialize_sidedata(
3739 serialized_sidedata = sidedatautil.serialize_sidedata(
3744 new_sidedata
3740 new_sidedata
3745 )
3741 )
3746
3742
3747 sidedata_compression_mode = COMP_MODE_INLINE
3743 sidedata_compression_mode = COMP_MODE_INLINE
3748 if serialized_sidedata and self.feature_config.has_side_data:
3744 if serialized_sidedata and self.feature_config.has_side_data:
3749 sidedata_compression_mode = COMP_MODE_PLAIN
3745 sidedata_compression_mode = COMP_MODE_PLAIN
3750 h, comp_sidedata = self.compress(serialized_sidedata)
3746 h, comp_sidedata = self.compress(serialized_sidedata)
3751 if (
3747 if (
3752 h != b'u'
3748 h != b'u'
3753 and comp_sidedata[0] != b'\0'
3749 and comp_sidedata[0] != b'\0'
3754 and len(comp_sidedata) < len(serialized_sidedata)
3750 and len(comp_sidedata) < len(serialized_sidedata)
3755 ):
3751 ):
3756 assert not h
3752 assert not h
3757 if (
3753 if (
3758 comp_sidedata[0]
3754 comp_sidedata[0]
3759 == self._docket.default_compression_header
3755 == self._docket.default_compression_header
3760 ):
3756 ):
3761 sidedata_compression_mode = COMP_MODE_DEFAULT
3757 sidedata_compression_mode = COMP_MODE_DEFAULT
3762 serialized_sidedata = comp_sidedata
3758 serialized_sidedata = comp_sidedata
3763 else:
3759 else:
3764 sidedata_compression_mode = COMP_MODE_INLINE
3760 sidedata_compression_mode = COMP_MODE_INLINE
3765 serialized_sidedata = comp_sidedata
3761 serialized_sidedata = comp_sidedata
3766 if entry[8] != 0 or entry[9] != 0:
3762 if entry[8] != 0 or entry[9] != 0:
3767 # rewriting entries that already have sidedata is not
3763 # rewriting entries that already have sidedata is not
3768 # supported yet, because it introduces garbage data in the
3764 # supported yet, because it introduces garbage data in the
3769 # revlog.
3765 # revlog.
3770 msg = b"rewriting existing sidedata is not supported yet"
3766 msg = b"rewriting existing sidedata is not supported yet"
3771 raise error.Abort(msg)
3767 raise error.Abort(msg)
3772
3768
3773 # Apply (potential) flags to add and to remove after running
3769 # Apply (potential) flags to add and to remove after running
3774 # the sidedata helpers
3770 # the sidedata helpers
3775 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3771 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3776 entry_update = (
3772 entry_update = (
3777 current_offset,
3773 current_offset,
3778 len(serialized_sidedata),
3774 len(serialized_sidedata),
3779 new_offset_flags,
3775 new_offset_flags,
3780 sidedata_compression_mode,
3776 sidedata_compression_mode,
3781 )
3777 )
3782
3778
3783 # the sidedata computation might have move the file cursors around
3779 # the sidedata computation might have move the file cursors around
3784 sdfh.seek(current_offset, os.SEEK_SET)
3780 sdfh.seek(current_offset, os.SEEK_SET)
3785 sdfh.write(serialized_sidedata)
3781 sdfh.write(serialized_sidedata)
3786 new_entries.append(entry_update)
3782 new_entries.append(entry_update)
3787 current_offset += len(serialized_sidedata)
3783 current_offset += len(serialized_sidedata)
3788 self._docket.sidedata_end = sdfh.tell()
3784 self._docket.sidedata_end = sdfh.tell()
3789
3785
3790 # rewrite the new index entries
3786 # rewrite the new index entries
3791 ifh.seek(startrev * self.index.entry_size)
3787 ifh.seek(startrev * self.index.entry_size)
3792 for i, e in enumerate(new_entries):
3788 for i, e in enumerate(new_entries):
3793 rev = startrev + i
3789 rev = startrev + i
3794 self.index.replace_sidedata_info(rev, *e)
3790 self.index.replace_sidedata_info(rev, *e)
3795 packed = self.index.entry_binary(rev)
3791 packed = self.index.entry_binary(rev)
3796 if rev == 0 and self._docket is None:
3792 if rev == 0 and self._docket is None:
3797 header = self._format_flags | self._format_version
3793 header = self._format_flags | self._format_version
3798 header = self.index.pack_header(header)
3794 header = self.index.pack_header(header)
3799 packed = header + packed
3795 packed = header + packed
3800 ifh.write(packed)
3796 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now