##// END OF EJS Templates
revlog: remove legacy usage of `_checkambig`...
marmoute -
r51941:59c6f997 default
parent child Browse files
Show More
@@ -1,3714 +1,3718 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class revlog:
340 class revlog:
341 """
341 """
342 the underlying revision storage object
342 the underlying revision storage object
343
343
344 A revlog consists of two parts, an index and the revision data.
344 A revlog consists of two parts, an index and the revision data.
345
345
346 The index is a file with a fixed record size containing
346 The index is a file with a fixed record size containing
347 information on each revision, including its nodeid (hash), the
347 information on each revision, including its nodeid (hash), the
348 nodeids of its parents, the position and offset of its data within
348 nodeids of its parents, the position and offset of its data within
349 the data file, and the revision it's based on. Finally, each entry
349 the data file, and the revision it's based on. Finally, each entry
350 contains a linkrev entry that can serve as a pointer to external
350 contains a linkrev entry that can serve as a pointer to external
351 data.
351 data.
352
352
353 The revision data itself is a linear collection of data chunks.
353 The revision data itself is a linear collection of data chunks.
354 Each chunk represents a revision and is usually represented as a
354 Each chunk represents a revision and is usually represented as a
355 delta against the previous chunk. To bound lookup time, runs of
355 delta against the previous chunk. To bound lookup time, runs of
356 deltas are limited to about 2 times the length of the original
356 deltas are limited to about 2 times the length of the original
357 version data. This makes retrieval of a version proportional to
357 version data. This makes retrieval of a version proportional to
358 its size, or O(1) relative to the number of revisions.
358 its size, or O(1) relative to the number of revisions.
359
359
360 Both pieces of the revlog are written to in an append-only
360 Both pieces of the revlog are written to in an append-only
361 fashion, which means we never need to rewrite a file to insert or
361 fashion, which means we never need to rewrite a file to insert or
362 remove data, and can use some simple techniques to avoid the need
362 remove data, and can use some simple techniques to avoid the need
363 for locking while reading.
363 for locking while reading.
364
364
365 If checkambig, indexfile is opened with checkambig=True at
365 If checkambig, indexfile is opened with checkambig=True at
366 writing, to avoid file stat ambiguity.
366 writing, to avoid file stat ambiguity.
367
367
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 index will be mmapped rather than read if it is larger than the
369 index will be mmapped rather than read if it is larger than the
370 configured threshold.
370 configured threshold.
371
371
372 If censorable is True, the revlog can have censored revisions.
372 If censorable is True, the revlog can have censored revisions.
373
373
374 If `upperboundcomp` is not None, this is the expected maximal gain from
374 If `upperboundcomp` is not None, this is the expected maximal gain from
375 compression for the data content.
375 compression for the data content.
376
376
377 `concurrencychecker` is an optional function that receives 3 arguments: a
377 `concurrencychecker` is an optional function that receives 3 arguments: a
378 file handle, a filename, and an expected position. It should check whether
378 file handle, a filename, and an expected position. It should check whether
379 the current position in the file handle is valid, and log/warn/fail (by
379 the current position in the file handle is valid, and log/warn/fail (by
380 raising).
380 raising).
381
381
382 See mercurial/revlogutils/contants.py for details about the content of an
382 See mercurial/revlogutils/contants.py for details about the content of an
383 index entry.
383 index entry.
384 """
384 """
385
385
386 _flagserrorclass = error.RevlogError
386 _flagserrorclass = error.RevlogError
387
387
388 @staticmethod
388 @staticmethod
389 def is_inline_index(header_bytes):
389 def is_inline_index(header_bytes):
390 """Determine if a revlog is inline from the initial bytes of the index"""
390 """Determine if a revlog is inline from the initial bytes of the index"""
391 header = INDEX_HEADER.unpack(header_bytes)[0]
391 header = INDEX_HEADER.unpack(header_bytes)[0]
392
392
393 _format_flags = header & ~0xFFFF
393 _format_flags = header & ~0xFFFF
394 _format_version = header & 0xFFFF
394 _format_version = header & 0xFFFF
395
395
396 features = FEATURES_BY_VERSION[_format_version]
396 features = FEATURES_BY_VERSION[_format_version]
397 return features[b'inline'](_format_flags)
397 return features[b'inline'](_format_flags)
398
398
399 def __init__(
399 def __init__(
400 self,
400 self,
401 opener,
401 opener,
402 target,
402 target,
403 radix,
403 radix,
404 postfix=None, # only exist for `tmpcensored` now
404 postfix=None, # only exist for `tmpcensored` now
405 checkambig=False,
405 checkambig=False,
406 mmaplargeindex=False,
406 mmaplargeindex=False,
407 censorable=False,
407 censorable=False,
408 upperboundcomp=None,
408 upperboundcomp=None,
409 persistentnodemap=False,
409 persistentnodemap=False,
410 concurrencychecker=None,
410 concurrencychecker=None,
411 trypending=False,
411 trypending=False,
412 try_split=False,
412 try_split=False,
413 canonical_parent_order=True,
413 canonical_parent_order=True,
414 ):
414 ):
415 """
415 """
416 create a revlog object
416 create a revlog object
417
417
418 opener is a function that abstracts the file opening operation
418 opener is a function that abstracts the file opening operation
419 and can be used to implement COW semantics or the like.
419 and can be used to implement COW semantics or the like.
420
420
421 `target`: a (KIND, ID) tuple that identify the content stored in
421 `target`: a (KIND, ID) tuple that identify the content stored in
422 this revlog. It help the rest of the code to understand what the revlog
422 this revlog. It help the rest of the code to understand what the revlog
423 is about without having to resort to heuristic and index filename
423 is about without having to resort to heuristic and index filename
424 analysis. Note: that this must be reliably be set by normal code, but
424 analysis. Note: that this must be reliably be set by normal code, but
425 that test, debug, or performance measurement code might not set this to
425 that test, debug, or performance measurement code might not set this to
426 accurate value.
426 accurate value.
427 """
427 """
428 self.upperboundcomp = upperboundcomp
428 self.upperboundcomp = upperboundcomp
429
429
430 self.radix = radix
430 self.radix = radix
431
431
432 self._docket_file = None
432 self._docket_file = None
433 self._indexfile = None
433 self._indexfile = None
434 self._datafile = None
434 self._datafile = None
435 self._sidedatafile = None
435 self._sidedatafile = None
436 self._nodemap_file = None
436 self._nodemap_file = None
437 self.postfix = postfix
437 self.postfix = postfix
438 self._trypending = trypending
438 self._trypending = trypending
439 self._try_split = try_split
439 self._try_split = try_split
440 self.opener = opener
440 self.opener = opener
441 if persistentnodemap:
441 if persistentnodemap:
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
443
443
444 assert target[0] in ALL_KINDS
444 assert target[0] in ALL_KINDS
445 assert len(target) == 2
445 assert len(target) == 2
446 self.target = target
446 self.target = target
447 if b'feature-config' in self.opener.options:
447 if b'feature-config' in self.opener.options:
448 self.feature_config = self.opener.options[b'feature-config'].copy()
448 self.feature_config = self.opener.options[b'feature-config'].copy()
449 else:
449 else:
450 self.feature_config = FeatureConfig()
450 self.feature_config = FeatureConfig()
451 self.feature_config.censorable = censorable
451 self.feature_config.censorable = censorable
452 self.feature_config.canonical_parent_order = canonical_parent_order
452 self.feature_config.canonical_parent_order = canonical_parent_order
453 if b'data-config' in self.opener.options:
453 if b'data-config' in self.opener.options:
454 self.data_config = self.opener.options[b'data-config'].copy()
454 self.data_config = self.opener.options[b'data-config'].copy()
455 else:
455 else:
456 self.data_config = DataConfig()
456 self.data_config = DataConfig()
457 self.data_config.check_ambig = checkambig
457 self.data_config.check_ambig = checkambig
458 self.data_config.mmap_large_index = mmaplargeindex
458 self.data_config.mmap_large_index = mmaplargeindex
459 if b'delta-config' in self.opener.options:
459 if b'delta-config' in self.opener.options:
460 self.delta_config = self.opener.options[b'delta-config'].copy()
460 self.delta_config = self.opener.options[b'delta-config'].copy()
461 else:
461 else:
462 self.delta_config = DeltaConfig()
462 self.delta_config = DeltaConfig()
463
463
464 # 3-tuple of (node, rev, text) for a raw revision.
464 # 3-tuple of (node, rev, text) for a raw revision.
465 self._revisioncache = None
465 self._revisioncache = None
466 # Maps rev to chain base rev.
466 # Maps rev to chain base rev.
467 self._chainbasecache = util.lrucachedict(100)
467 self._chainbasecache = util.lrucachedict(100)
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
468 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
469 self._chunkcache = (0, b'')
469 self._chunkcache = (0, b'')
470
470
471 self.index = None
471 self.index = None
472 self._docket = None
472 self._docket = None
473 self._nodemap_docket = None
473 self._nodemap_docket = None
474 # Mapping of partial identifiers to full nodes.
474 # Mapping of partial identifiers to full nodes.
475 self._pcache = {}
475 self._pcache = {}
476
476
477 # other optionnals features
477 # other optionnals features
478
478
479 # Make copy of flag processors so each revlog instance can support
479 # Make copy of flag processors so each revlog instance can support
480 # custom flags.
480 # custom flags.
481 self._flagprocessors = dict(flagutil.flagprocessors)
481 self._flagprocessors = dict(flagutil.flagprocessors)
482
482
483 # 3-tuple of file handles being used for active writing.
483 # 3-tuple of file handles being used for active writing.
484 self._writinghandles = None
484 self._writinghandles = None
485 # prevent nesting of addgroup
485 # prevent nesting of addgroup
486 self._adding_group = None
486 self._adding_group = None
487
487
488 self._loadindex()
488 self._loadindex()
489
489
490 self._concurrencychecker = concurrencychecker
490 self._concurrencychecker = concurrencychecker
491
491
492 @property
492 @property
493 def _generaldelta(self):
493 def _generaldelta(self):
494 """temporary compatibility proxy"""
494 """temporary compatibility proxy"""
495 return self.delta_config.general_delta
495 return self.delta_config.general_delta
496
496
497 @property
497 @property
498 def _checkambig(self):
498 def _checkambig(self):
499 """temporary compatibility proxy"""
499 """temporary compatibility proxy"""
500 return self.data_config.check_ambig
500 return self.data_config.check_ambig
501
501
502 @property
502 @property
503 def _mmaplargeindex(self):
503 def _mmaplargeindex(self):
504 """temporary compatibility proxy"""
504 """temporary compatibility proxy"""
505 return self.data_config.mmap_large_index
505 return self.data_config.mmap_large_index
506
506
507 @property
507 @property
508 def _censorable(self):
508 def _censorable(self):
509 """temporary compatibility proxy"""
509 """temporary compatibility proxy"""
510 return self.feature_config.censorable
510 return self.feature_config.censorable
511
511
512 @property
512 @property
513 def _chunkcachesize(self):
513 def _chunkcachesize(self):
514 """temporary compatibility proxy"""
514 """temporary compatibility proxy"""
515 return self.data_config.chunk_cache_size
515 return self.data_config.chunk_cache_size
516
516
517 @property
517 @property
518 def _maxchainlen(self):
518 def _maxchainlen(self):
519 """temporary compatibility proxy"""
519 """temporary compatibility proxy"""
520 return self.delta_config.max_chain_len
520 return self.delta_config.max_chain_len
521
521
522 @property
522 @property
523 def _deltabothparents(self):
523 def _deltabothparents(self):
524 """temporary compatibility proxy"""
524 """temporary compatibility proxy"""
525 return self.delta_config.delta_both_parents
525 return self.delta_config.delta_both_parents
526
526
527 @property
527 @property
528 def _candidate_group_chunk_size(self):
528 def _candidate_group_chunk_size(self):
529 """temporary compatibility proxy"""
529 """temporary compatibility proxy"""
530 return self.delta_config.candidate_group_chunk_size
530 return self.delta_config.candidate_group_chunk_size
531
531
532 @property
532 @property
533 def _debug_delta(self):
533 def _debug_delta(self):
534 """temporary compatibility proxy"""
534 """temporary compatibility proxy"""
535 return self.delta_config.debug_delta
535 return self.delta_config.debug_delta
536
536
537 @property
537 @property
538 def _compengine(self):
538 def _compengine(self):
539 """temporary compatibility proxy"""
539 """temporary compatibility proxy"""
540 return self.feature_config.compression_engine
540 return self.feature_config.compression_engine
541
541
542 @property
542 @property
543 def _compengineopts(self):
543 def _compengineopts(self):
544 """temporary compatibility proxy"""
544 """temporary compatibility proxy"""
545 return self.feature_config.compression_engine_options
545 return self.feature_config.compression_engine_options
546
546
547 @property
547 @property
548 def _maxdeltachainspan(self):
548 def _maxdeltachainspan(self):
549 """temporary compatibility proxy"""
549 """temporary compatibility proxy"""
550 return self.delta_config.max_deltachain_span
550 return self.delta_config.max_deltachain_span
551
551
552 @property
552 @property
553 def _withsparseread(self):
553 def _withsparseread(self):
554 """temporary compatibility proxy"""
554 """temporary compatibility proxy"""
555 return self.data_config.with_sparse_read
555 return self.data_config.with_sparse_read
556
556
557 @property
557 @property
558 def _sparserevlog(self):
558 def _sparserevlog(self):
559 """temporary compatibility proxy"""
559 """temporary compatibility proxy"""
560 return self.delta_config.sparse_revlog
560 return self.delta_config.sparse_revlog
561
561
562 @property
562 @property
563 def hassidedata(self):
563 def hassidedata(self):
564 """temporary compatibility proxy"""
564 """temporary compatibility proxy"""
565 return self.feature_config.has_side_data
565 return self.feature_config.has_side_data
566
566
567 @property
567 @property
568 def _srdensitythreshold(self):
568 def _srdensitythreshold(self):
569 """temporary compatibility proxy"""
569 """temporary compatibility proxy"""
570 return self.data_config.sr_density_threshold
570 return self.data_config.sr_density_threshold
571
571
572 @property
572 @property
573 def _srmingapsize(self):
573 def _srmingapsize(self):
574 """temporary compatibility proxy"""
574 """temporary compatibility proxy"""
575 return self.data_config.sr_min_gap_size
575 return self.data_config.sr_min_gap_size
576
576
577 @property
577 @property
578 def _compute_rank(self):
578 def _compute_rank(self):
579 """temporary compatibility proxy"""
579 """temporary compatibility proxy"""
580 return self.feature_config.compute_rank
580 return self.feature_config.compute_rank
581
581
582 @property
582 @property
583 def canonical_parent_order(self):
583 def canonical_parent_order(self):
584 """temporary compatibility proxy"""
584 """temporary compatibility proxy"""
585 return self.feature_config.canonical_parent_order
585 return self.feature_config.canonical_parent_order
586
586
587 @property
587 @property
588 def _lazydelta(self):
588 def _lazydelta(self):
589 """temporary compatibility proxy"""
589 """temporary compatibility proxy"""
590 return self.delta_config.lazy_delta
590 return self.delta_config.lazy_delta
591
591
592 @property
592 @property
593 def _lazydeltabase(self):
593 def _lazydeltabase(self):
594 """temporary compatibility proxy"""
594 """temporary compatibility proxy"""
595 return self.delta_config.lazy_delta_base
595 return self.delta_config.lazy_delta_base
596
596
597 def _init_opts(self):
597 def _init_opts(self):
598 """process options (from above/config) to setup associated default revlog mode
598 """process options (from above/config) to setup associated default revlog mode
599
599
600 These values might be affected when actually reading on disk information.
600 These values might be affected when actually reading on disk information.
601
601
602 The relevant values are returned for use in _loadindex().
602 The relevant values are returned for use in _loadindex().
603
603
604 * newversionflags:
604 * newversionflags:
605 version header to use if we need to create a new revlog
605 version header to use if we need to create a new revlog
606
606
607 * mmapindexthreshold:
607 * mmapindexthreshold:
608 minimal index size for start to use mmap
608 minimal index size for start to use mmap
609
609
610 * force_nodemap:
610 * force_nodemap:
611 force the usage of a "development" version of the nodemap code
611 force the usage of a "development" version of the nodemap code
612 """
612 """
613 opts = self.opener.options
613 opts = self.opener.options
614
614
615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
615 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
616 new_header = CHANGELOGV2
616 new_header = CHANGELOGV2
617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
617 compute_rank = opts.get(b'changelogv2.compute-rank', True)
618 self.feature_config.compute_rank = compute_rank
618 self.feature_config.compute_rank = compute_rank
619 elif b'revlogv2' in opts:
619 elif b'revlogv2' in opts:
620 new_header = REVLOGV2
620 new_header = REVLOGV2
621 elif b'revlogv1' in opts:
621 elif b'revlogv1' in opts:
622 new_header = REVLOGV1 | FLAG_INLINE_DATA
622 new_header = REVLOGV1 | FLAG_INLINE_DATA
623 if b'generaldelta' in opts:
623 if b'generaldelta' in opts:
624 new_header |= FLAG_GENERALDELTA
624 new_header |= FLAG_GENERALDELTA
625 elif b'revlogv0' in self.opener.options:
625 elif b'revlogv0' in self.opener.options:
626 new_header = REVLOGV0
626 new_header = REVLOGV0
627 else:
627 else:
628 new_header = REVLOG_DEFAULT_VERSION
628 new_header = REVLOG_DEFAULT_VERSION
629
629
630 mmapindexthreshold = None
630 mmapindexthreshold = None
631 if self._mmaplargeindex:
631 if self._mmaplargeindex:
632 mmapindexthreshold = self.data_config.mmap_index_threshold
632 mmapindexthreshold = self.data_config.mmap_index_threshold
633 if self.feature_config.enable_ellipsis:
633 if self.feature_config.enable_ellipsis:
634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
634 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
635
635
636 # revlog v0 doesn't have flag processors
636 # revlog v0 doesn't have flag processors
637 for flag, processor in opts.get(b'flagprocessors', {}).items():
637 for flag, processor in opts.get(b'flagprocessors', {}).items():
638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
638 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
639
639
640 chunk_cache_size = self.data_config.chunk_cache_size
640 chunk_cache_size = self.data_config.chunk_cache_size
641 if chunk_cache_size <= 0:
641 if chunk_cache_size <= 0:
642 raise error.RevlogError(
642 raise error.RevlogError(
643 _(b'revlog chunk cache size %r is not greater than 0')
643 _(b'revlog chunk cache size %r is not greater than 0')
644 % chunk_cache_size
644 % chunk_cache_size
645 )
645 )
646 elif chunk_cache_size & (chunk_cache_size - 1):
646 elif chunk_cache_size & (chunk_cache_size - 1):
647 raise error.RevlogError(
647 raise error.RevlogError(
648 _(b'revlog chunk cache size %r is not a power of 2')
648 _(b'revlog chunk cache size %r is not a power of 2')
649 % chunk_cache_size
649 % chunk_cache_size
650 )
650 )
651 force_nodemap = opts.get(b'devel-force-nodemap', False)
651 force_nodemap = opts.get(b'devel-force-nodemap', False)
652 return new_header, mmapindexthreshold, force_nodemap
652 return new_header, mmapindexthreshold, force_nodemap
653
653
654 def _get_data(self, filepath, mmap_threshold, size=None):
654 def _get_data(self, filepath, mmap_threshold, size=None):
655 """return a file content with or without mmap
655 """return a file content with or without mmap
656
656
657 If the file is missing return the empty string"""
657 If the file is missing return the empty string"""
658 try:
658 try:
659 with self.opener(filepath) as fp:
659 with self.opener(filepath) as fp:
660 if mmap_threshold is not None:
660 if mmap_threshold is not None:
661 file_size = self.opener.fstat(fp).st_size
661 file_size = self.opener.fstat(fp).st_size
662 if file_size >= mmap_threshold:
662 if file_size >= mmap_threshold:
663 if size is not None:
663 if size is not None:
664 # avoid potentiel mmap crash
664 # avoid potentiel mmap crash
665 size = min(file_size, size)
665 size = min(file_size, size)
666 # TODO: should .close() to release resources without
666 # TODO: should .close() to release resources without
667 # relying on Python GC
667 # relying on Python GC
668 if size is None:
668 if size is None:
669 return util.buffer(util.mmapread(fp))
669 return util.buffer(util.mmapread(fp))
670 else:
670 else:
671 return util.buffer(util.mmapread(fp, size))
671 return util.buffer(util.mmapread(fp, size))
672 if size is None:
672 if size is None:
673 return fp.read()
673 return fp.read()
674 else:
674 else:
675 return fp.read(size)
675 return fp.read(size)
676 except FileNotFoundError:
676 except FileNotFoundError:
677 return b''
677 return b''
678
678
679 def get_streams(self, max_linkrev, force_inline=False):
679 def get_streams(self, max_linkrev, force_inline=False):
680 """return a list of streams that represent this revlog
680 """return a list of streams that represent this revlog
681
681
682 This is used by stream-clone to do bytes to bytes copies of a repository.
682 This is used by stream-clone to do bytes to bytes copies of a repository.
683
683
684 This streams data for all revisions that refer to a changelog revision up
684 This streams data for all revisions that refer to a changelog revision up
685 to `max_linkrev`.
685 to `max_linkrev`.
686
686
687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
687 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
688
688
689 It returns is a list of three-tuple:
689 It returns is a list of three-tuple:
690
690
691 [
691 [
692 (filename, bytes_stream, stream_size),
692 (filename, bytes_stream, stream_size),
693 …
693 …
694 ]
694 ]
695 """
695 """
696 n = len(self)
696 n = len(self)
697 index = self.index
697 index = self.index
698 while n > 0:
698 while n > 0:
699 linkrev = index[n - 1][4]
699 linkrev = index[n - 1][4]
700 if linkrev < max_linkrev:
700 if linkrev < max_linkrev:
701 break
701 break
702 # note: this loop will rarely go through multiple iterations, since
702 # note: this loop will rarely go through multiple iterations, since
703 # it only traverses commits created during the current streaming
703 # it only traverses commits created during the current streaming
704 # pull operation.
704 # pull operation.
705 #
705 #
706 # If this become a problem, using a binary search should cap the
706 # If this become a problem, using a binary search should cap the
707 # runtime of this.
707 # runtime of this.
708 n = n - 1
708 n = n - 1
709 if n == 0:
709 if n == 0:
710 # no data to send
710 # no data to send
711 return []
711 return []
712 index_size = n * index.entry_size
712 index_size = n * index.entry_size
713 data_size = self.end(n - 1)
713 data_size = self.end(n - 1)
714
714
715 # XXX we might have been split (or stripped) since the object
715 # XXX we might have been split (or stripped) since the object
716 # initialization, We need to close this race too, but having a way to
716 # initialization, We need to close this race too, but having a way to
717 # pre-open the file we feed to the revlog and never closing them before
717 # pre-open the file we feed to the revlog and never closing them before
718 # we are done streaming.
718 # we are done streaming.
719
719
720 if self._inline:
720 if self._inline:
721
721
722 def get_stream():
722 def get_stream():
723 with self._indexfp() as fp:
723 with self._indexfp() as fp:
724 yield None
724 yield None
725 size = index_size + data_size
725 size = index_size + data_size
726 if size <= 65536:
726 if size <= 65536:
727 yield fp.read(size)
727 yield fp.read(size)
728 else:
728 else:
729 yield from util.filechunkiter(fp, limit=size)
729 yield from util.filechunkiter(fp, limit=size)
730
730
731 inline_stream = get_stream()
731 inline_stream = get_stream()
732 next(inline_stream)
732 next(inline_stream)
733 return [
733 return [
734 (self._indexfile, inline_stream, index_size + data_size),
734 (self._indexfile, inline_stream, index_size + data_size),
735 ]
735 ]
736 elif force_inline:
736 elif force_inline:
737
737
738 def get_stream():
738 def get_stream():
739 with self.reading():
739 with self.reading():
740 yield None
740 yield None
741
741
742 for rev in range(n):
742 for rev in range(n):
743 idx = self.index.entry_binary(rev)
743 idx = self.index.entry_binary(rev)
744 if rev == 0 and self._docket is None:
744 if rev == 0 and self._docket is None:
745 # re-inject the inline flag
745 # re-inject the inline flag
746 header = self._format_flags
746 header = self._format_flags
747 header |= self._format_version
747 header |= self._format_version
748 header |= FLAG_INLINE_DATA
748 header |= FLAG_INLINE_DATA
749 header = self.index.pack_header(header)
749 header = self.index.pack_header(header)
750 idx = header + idx
750 idx = header + idx
751 yield idx
751 yield idx
752 yield self._getsegmentforrevs(rev, rev)[1]
752 yield self._getsegmentforrevs(rev, rev)[1]
753
753
754 inline_stream = get_stream()
754 inline_stream = get_stream()
755 next(inline_stream)
755 next(inline_stream)
756 return [
756 return [
757 (self._indexfile, inline_stream, index_size + data_size),
757 (self._indexfile, inline_stream, index_size + data_size),
758 ]
758 ]
759 else:
759 else:
760
760
761 def get_index_stream():
761 def get_index_stream():
762 with self._indexfp() as fp:
762 with self._indexfp() as fp:
763 yield None
763 yield None
764 if index_size <= 65536:
764 if index_size <= 65536:
765 yield fp.read(index_size)
765 yield fp.read(index_size)
766 else:
766 else:
767 yield from util.filechunkiter(fp, limit=index_size)
767 yield from util.filechunkiter(fp, limit=index_size)
768
768
769 def get_data_stream():
769 def get_data_stream():
770 with self._datafp() as fp:
770 with self._datafp() as fp:
771 yield None
771 yield None
772 if data_size <= 65536:
772 if data_size <= 65536:
773 yield fp.read(data_size)
773 yield fp.read(data_size)
774 else:
774 else:
775 yield from util.filechunkiter(fp, limit=data_size)
775 yield from util.filechunkiter(fp, limit=data_size)
776
776
777 index_stream = get_index_stream()
777 index_stream = get_index_stream()
778 next(index_stream)
778 next(index_stream)
779 data_stream = get_data_stream()
779 data_stream = get_data_stream()
780 next(data_stream)
780 next(data_stream)
781 return [
781 return [
782 (self._datafile, data_stream, data_size),
782 (self._datafile, data_stream, data_size),
783 (self._indexfile, index_stream, index_size),
783 (self._indexfile, index_stream, index_size),
784 ]
784 ]
785
785
786 def _loadindex(self, docket=None):
786 def _loadindex(self, docket=None):
787
787
788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
788 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
789
789
790 if self.postfix is not None:
790 if self.postfix is not None:
791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
791 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
792 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
793 entry_point = b'%s.i.a' % self.radix
793 entry_point = b'%s.i.a' % self.radix
794 elif self._try_split and self.opener.exists(self._split_index_file):
794 elif self._try_split and self.opener.exists(self._split_index_file):
795 entry_point = self._split_index_file
795 entry_point = self._split_index_file
796 else:
796 else:
797 entry_point = b'%s.i' % self.radix
797 entry_point = b'%s.i' % self.radix
798
798
799 if docket is not None:
799 if docket is not None:
800 self._docket = docket
800 self._docket = docket
801 self._docket_file = entry_point
801 self._docket_file = entry_point
802 else:
802 else:
803 self._initempty = True
803 self._initempty = True
804 entry_data = self._get_data(entry_point, mmapindexthreshold)
804 entry_data = self._get_data(entry_point, mmapindexthreshold)
805 if len(entry_data) > 0:
805 if len(entry_data) > 0:
806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
806 header = INDEX_HEADER.unpack(entry_data[:4])[0]
807 self._initempty = False
807 self._initempty = False
808 else:
808 else:
809 header = new_header
809 header = new_header
810
810
811 self._format_flags = header & ~0xFFFF
811 self._format_flags = header & ~0xFFFF
812 self._format_version = header & 0xFFFF
812 self._format_version = header & 0xFFFF
813
813
814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
814 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
815 if supported_flags is None:
815 if supported_flags is None:
816 msg = _(b'unknown version (%d) in revlog %s')
816 msg = _(b'unknown version (%d) in revlog %s')
817 msg %= (self._format_version, self.display_id)
817 msg %= (self._format_version, self.display_id)
818 raise error.RevlogError(msg)
818 raise error.RevlogError(msg)
819 elif self._format_flags & ~supported_flags:
819 elif self._format_flags & ~supported_flags:
820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
820 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
821 display_flag = self._format_flags >> 16
821 display_flag = self._format_flags >> 16
822 msg %= (display_flag, self._format_version, self.display_id)
822 msg %= (display_flag, self._format_version, self.display_id)
823 raise error.RevlogError(msg)
823 raise error.RevlogError(msg)
824
824
825 features = FEATURES_BY_VERSION[self._format_version]
825 features = FEATURES_BY_VERSION[self._format_version]
826 self._inline = features[b'inline'](self._format_flags)
826 self._inline = features[b'inline'](self._format_flags)
827 self.delta_config.general_delta = features[b'generaldelta'](
827 self.delta_config.general_delta = features[b'generaldelta'](
828 self._format_flags
828 self._format_flags
829 )
829 )
830 self.feature_config.has_side_data = features[b'sidedata']
830 self.feature_config.has_side_data = features[b'sidedata']
831
831
832 if not features[b'docket']:
832 if not features[b'docket']:
833 self._indexfile = entry_point
833 self._indexfile = entry_point
834 index_data = entry_data
834 index_data = entry_data
835 else:
835 else:
836 self._docket_file = entry_point
836 self._docket_file = entry_point
837 if self._initempty:
837 if self._initempty:
838 self._docket = docketutil.default_docket(self, header)
838 self._docket = docketutil.default_docket(self, header)
839 else:
839 else:
840 self._docket = docketutil.parse_docket(
840 self._docket = docketutil.parse_docket(
841 self, entry_data, use_pending=self._trypending
841 self, entry_data, use_pending=self._trypending
842 )
842 )
843
843
844 if self._docket is not None:
844 if self._docket is not None:
845 self._indexfile = self._docket.index_filepath()
845 self._indexfile = self._docket.index_filepath()
846 index_data = b''
846 index_data = b''
847 index_size = self._docket.index_end
847 index_size = self._docket.index_end
848 if index_size > 0:
848 if index_size > 0:
849 index_data = self._get_data(
849 index_data = self._get_data(
850 self._indexfile, mmapindexthreshold, size=index_size
850 self._indexfile, mmapindexthreshold, size=index_size
851 )
851 )
852 if len(index_data) < index_size:
852 if len(index_data) < index_size:
853 msg = _(b'too few index data for %s: got %d, expected %d')
853 msg = _(b'too few index data for %s: got %d, expected %d')
854 msg %= (self.display_id, len(index_data), index_size)
854 msg %= (self.display_id, len(index_data), index_size)
855 raise error.RevlogError(msg)
855 raise error.RevlogError(msg)
856
856
857 self._inline = False
857 self._inline = False
858 # generaldelta implied by version 2 revlogs.
858 # generaldelta implied by version 2 revlogs.
859 self.delta_config.general_delta = True
859 self.delta_config.general_delta = True
860 # the logic for persistent nodemap will be dealt with within the
860 # the logic for persistent nodemap will be dealt with within the
861 # main docket, so disable it for now.
861 # main docket, so disable it for now.
862 self._nodemap_file = None
862 self._nodemap_file = None
863
863
864 if self._docket is not None:
864 if self._docket is not None:
865 self._datafile = self._docket.data_filepath()
865 self._datafile = self._docket.data_filepath()
866 self._sidedatafile = self._docket.sidedata_filepath()
866 self._sidedatafile = self._docket.sidedata_filepath()
867 elif self.postfix is None:
867 elif self.postfix is None:
868 self._datafile = b'%s.d' % self.radix
868 self._datafile = b'%s.d' % self.radix
869 else:
869 else:
870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
870 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
871
871
872 self.nodeconstants = sha1nodeconstants
872 self.nodeconstants = sha1nodeconstants
873 self.nullid = self.nodeconstants.nullid
873 self.nullid = self.nodeconstants.nullid
874
874
875 # sparse-revlog can't be on without general-delta (issue6056)
875 # sparse-revlog can't be on without general-delta (issue6056)
876 if not self.delta_config.general_delta:
876 if not self.delta_config.general_delta:
877 self.delta_config.sparse_revlog = False
877 self.delta_config.sparse_revlog = False
878
878
879 self._storedeltachains = True
879 self._storedeltachains = True
880
880
881 devel_nodemap = (
881 devel_nodemap = (
882 self._nodemap_file
882 self._nodemap_file
883 and force_nodemap
883 and force_nodemap
884 and parse_index_v1_nodemap is not None
884 and parse_index_v1_nodemap is not None
885 )
885 )
886
886
887 use_rust_index = False
887 use_rust_index = False
888 if rustrevlog is not None:
888 if rustrevlog is not None:
889 if self._nodemap_file is not None:
889 if self._nodemap_file is not None:
890 use_rust_index = True
890 use_rust_index = True
891 else:
891 else:
892 use_rust_index = self.opener.options.get(b'rust.index')
892 use_rust_index = self.opener.options.get(b'rust.index')
893
893
894 self._parse_index = parse_index_v1
894 self._parse_index = parse_index_v1
895 if self._format_version == REVLOGV0:
895 if self._format_version == REVLOGV0:
896 self._parse_index = revlogv0.parse_index_v0
896 self._parse_index = revlogv0.parse_index_v0
897 elif self._format_version == REVLOGV2:
897 elif self._format_version == REVLOGV2:
898 self._parse_index = parse_index_v2
898 self._parse_index = parse_index_v2
899 elif self._format_version == CHANGELOGV2:
899 elif self._format_version == CHANGELOGV2:
900 self._parse_index = parse_index_cl_v2
900 self._parse_index = parse_index_cl_v2
901 elif devel_nodemap:
901 elif devel_nodemap:
902 self._parse_index = parse_index_v1_nodemap
902 self._parse_index = parse_index_v1_nodemap
903 elif use_rust_index:
903 elif use_rust_index:
904 self._parse_index = parse_index_v1_mixed
904 self._parse_index = parse_index_v1_mixed
905 try:
905 try:
906 d = self._parse_index(index_data, self._inline)
906 d = self._parse_index(index_data, self._inline)
907 index, chunkcache = d
907 index, chunkcache = d
908 use_nodemap = (
908 use_nodemap = (
909 not self._inline
909 not self._inline
910 and self._nodemap_file is not None
910 and self._nodemap_file is not None
911 and hasattr(index, 'update_nodemap_data')
911 and hasattr(index, 'update_nodemap_data')
912 )
912 )
913 if use_nodemap:
913 if use_nodemap:
914 nodemap_data = nodemaputil.persisted_data(self)
914 nodemap_data = nodemaputil.persisted_data(self)
915 if nodemap_data is not None:
915 if nodemap_data is not None:
916 docket = nodemap_data[0]
916 docket = nodemap_data[0]
917 if (
917 if (
918 len(d[0]) > docket.tip_rev
918 len(d[0]) > docket.tip_rev
919 and d[0][docket.tip_rev][7] == docket.tip_node
919 and d[0][docket.tip_rev][7] == docket.tip_node
920 ):
920 ):
921 # no changelog tampering
921 # no changelog tampering
922 self._nodemap_docket = docket
922 self._nodemap_docket = docket
923 index.update_nodemap_data(*nodemap_data)
923 index.update_nodemap_data(*nodemap_data)
924 except (ValueError, IndexError):
924 except (ValueError, IndexError):
925 raise error.RevlogError(
925 raise error.RevlogError(
926 _(b"index %s is corrupted") % self.display_id
926 _(b"index %s is corrupted") % self.display_id
927 )
927 )
928 self.index = index
928 self.index = index
929 self._segmentfile = randomaccessfile.randomaccessfile(
929 self._segmentfile = randomaccessfile.randomaccessfile(
930 self.opener,
930 self.opener,
931 (self._indexfile if self._inline else self._datafile),
931 (self._indexfile if self._inline else self._datafile),
932 self._chunkcachesize,
932 self._chunkcachesize,
933 chunkcache,
933 chunkcache,
934 )
934 )
935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
935 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
936 self.opener,
936 self.opener,
937 self._sidedatafile,
937 self._sidedatafile,
938 self._chunkcachesize,
938 self._chunkcachesize,
939 )
939 )
940 # revnum -> (chain-length, sum-delta-length)
940 # revnum -> (chain-length, sum-delta-length)
941 self._chaininfocache = util.lrucachedict(500)
941 self._chaininfocache = util.lrucachedict(500)
942 # revlog header -> revlog compressor
942 # revlog header -> revlog compressor
943 self._decompressors = {}
943 self._decompressors = {}
944
944
945 def get_revlog(self):
945 def get_revlog(self):
946 """simple function to mirror API of other not-really-revlog API"""
946 """simple function to mirror API of other not-really-revlog API"""
947 return self
947 return self
948
948
949 @util.propertycache
949 @util.propertycache
950 def revlog_kind(self):
950 def revlog_kind(self):
951 return self.target[0]
951 return self.target[0]
952
952
953 @util.propertycache
953 @util.propertycache
954 def display_id(self):
954 def display_id(self):
955 """The public facing "ID" of the revlog that we use in message"""
955 """The public facing "ID" of the revlog that we use in message"""
956 if self.revlog_kind == KIND_FILELOG:
956 if self.revlog_kind == KIND_FILELOG:
957 # Reference the file without the "data/" prefix, so it is familiar
957 # Reference the file without the "data/" prefix, so it is familiar
958 # to the user.
958 # to the user.
959 return self.target[1]
959 return self.target[1]
960 else:
960 else:
961 return self.radix
961 return self.radix
962
962
963 def _get_decompressor(self, t):
963 def _get_decompressor(self, t):
964 try:
964 try:
965 compressor = self._decompressors[t]
965 compressor = self._decompressors[t]
966 except KeyError:
966 except KeyError:
967 try:
967 try:
968 engine = util.compengines.forrevlogheader(t)
968 engine = util.compengines.forrevlogheader(t)
969 compressor = engine.revlogcompressor(self._compengineopts)
969 compressor = engine.revlogcompressor(self._compengineopts)
970 self._decompressors[t] = compressor
970 self._decompressors[t] = compressor
971 except KeyError:
971 except KeyError:
972 raise error.RevlogError(
972 raise error.RevlogError(
973 _(b'unknown compression type %s') % binascii.hexlify(t)
973 _(b'unknown compression type %s') % binascii.hexlify(t)
974 )
974 )
975 return compressor
975 return compressor
976
976
977 @util.propertycache
977 @util.propertycache
978 def _compressor(self):
978 def _compressor(self):
979 engine = util.compengines[self._compengine]
979 engine = util.compengines[self._compengine]
980 return engine.revlogcompressor(self._compengineopts)
980 return engine.revlogcompressor(self._compengineopts)
981
981
982 @util.propertycache
982 @util.propertycache
983 def _decompressor(self):
983 def _decompressor(self):
984 """the default decompressor"""
984 """the default decompressor"""
985 if self._docket is None:
985 if self._docket is None:
986 return None
986 return None
987 t = self._docket.default_compression_header
987 t = self._docket.default_compression_header
988 c = self._get_decompressor(t)
988 c = self._get_decompressor(t)
989 return c.decompress
989 return c.decompress
990
990
991 def _indexfp(self):
991 def _indexfp(self):
992 """file object for the revlog's index file"""
992 """file object for the revlog's index file"""
993 return self.opener(self._indexfile, mode=b"r")
993 return self.opener(self._indexfile, mode=b"r")
994
994
995 def __index_write_fp(self):
995 def __index_write_fp(self):
996 # You should not use this directly and use `_writing` instead
996 # You should not use this directly and use `_writing` instead
997 try:
997 try:
998 f = self.opener(
998 f = self.opener(
999 self._indexfile, mode=b"r+", checkambig=self._checkambig
999 self._indexfile,
1000 mode=b"r+",
1001 checkambig=self.data_config.check_ambig,
1000 )
1002 )
1001 if self._docket is None:
1003 if self._docket is None:
1002 f.seek(0, os.SEEK_END)
1004 f.seek(0, os.SEEK_END)
1003 else:
1005 else:
1004 f.seek(self._docket.index_end, os.SEEK_SET)
1006 f.seek(self._docket.index_end, os.SEEK_SET)
1005 return f
1007 return f
1006 except FileNotFoundError:
1008 except FileNotFoundError:
1007 return self.opener(
1009 return self.opener(
1008 self._indexfile, mode=b"w+", checkambig=self._checkambig
1010 self._indexfile,
1011 mode=b"w+",
1012 checkambig=self.data_config.check_ambig,
1009 )
1013 )
1010
1014
1011 def __index_new_fp(self):
1015 def __index_new_fp(self):
1012 # You should not use this unless you are upgrading from inline revlog
1016 # You should not use this unless you are upgrading from inline revlog
1013 return self.opener(
1017 return self.opener(
1014 self._indexfile,
1018 self._indexfile,
1015 mode=b"w",
1019 mode=b"w",
1016 checkambig=self._checkambig,
1020 checkambig=self.data_config.check_ambig,
1017 atomictemp=True,
1021 atomictemp=True,
1018 )
1022 )
1019
1023
1020 def _datafp(self, mode=b'r'):
1024 def _datafp(self, mode=b'r'):
1021 """file object for the revlog's data file"""
1025 """file object for the revlog's data file"""
1022 return self.opener(self._datafile, mode=mode)
1026 return self.opener(self._datafile, mode=mode)
1023
1027
1024 @contextlib.contextmanager
1028 @contextlib.contextmanager
1025 def _sidedatareadfp(self):
1029 def _sidedatareadfp(self):
1026 """file object suitable to read sidedata"""
1030 """file object suitable to read sidedata"""
1027 if self._writinghandles:
1031 if self._writinghandles:
1028 yield self._writinghandles[2]
1032 yield self._writinghandles[2]
1029 else:
1033 else:
1030 with self.opener(self._sidedatafile) as fp:
1034 with self.opener(self._sidedatafile) as fp:
1031 yield fp
1035 yield fp
1032
1036
1033 def tiprev(self):
1037 def tiprev(self):
1034 return len(self.index) - 1
1038 return len(self.index) - 1
1035
1039
1036 def tip(self):
1040 def tip(self):
1037 return self.node(self.tiprev())
1041 return self.node(self.tiprev())
1038
1042
1039 def __contains__(self, rev):
1043 def __contains__(self, rev):
1040 return 0 <= rev < len(self)
1044 return 0 <= rev < len(self)
1041
1045
1042 def __len__(self):
1046 def __len__(self):
1043 return len(self.index)
1047 return len(self.index)
1044
1048
1045 def __iter__(self):
1049 def __iter__(self):
1046 return iter(range(len(self)))
1050 return iter(range(len(self)))
1047
1051
1048 def revs(self, start=0, stop=None):
1052 def revs(self, start=0, stop=None):
1049 """iterate over all rev in this revlog (from start to stop)"""
1053 """iterate over all rev in this revlog (from start to stop)"""
1050 return storageutil.iterrevs(len(self), start=start, stop=stop)
1054 return storageutil.iterrevs(len(self), start=start, stop=stop)
1051
1055
1052 def hasnode(self, node):
1056 def hasnode(self, node):
1053 try:
1057 try:
1054 self.rev(node)
1058 self.rev(node)
1055 return True
1059 return True
1056 except KeyError:
1060 except KeyError:
1057 return False
1061 return False
1058
1062
1059 def _candelta(self, baserev, rev):
1063 def _candelta(self, baserev, rev):
1060 """whether two revisions (baserev, rev) can be delta-ed or not"""
1064 """whether two revisions (baserev, rev) can be delta-ed or not"""
1061 # Disable delta if either rev requires a content-changing flag
1065 # Disable delta if either rev requires a content-changing flag
1062 # processor (ex. LFS). This is because such flag processor can alter
1066 # processor (ex. LFS). This is because such flag processor can alter
1063 # the rawtext content that the delta will be based on, and two clients
1067 # the rawtext content that the delta will be based on, and two clients
1064 # could have a same revlog node with different flags (i.e. different
1068 # could have a same revlog node with different flags (i.e. different
1065 # rawtext contents) and the delta could be incompatible.
1069 # rawtext contents) and the delta could be incompatible.
1066 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1070 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1067 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1071 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1068 ):
1072 ):
1069 return False
1073 return False
1070 return True
1074 return True
1071
1075
1072 def update_caches(self, transaction):
1076 def update_caches(self, transaction):
1073 """update on disk cache
1077 """update on disk cache
1074
1078
1075 If a transaction is passed, the update may be delayed to transaction
1079 If a transaction is passed, the update may be delayed to transaction
1076 commit."""
1080 commit."""
1077 if self._nodemap_file is not None:
1081 if self._nodemap_file is not None:
1078 if transaction is None:
1082 if transaction is None:
1079 nodemaputil.update_persistent_nodemap(self)
1083 nodemaputil.update_persistent_nodemap(self)
1080 else:
1084 else:
1081 nodemaputil.setup_persistent_nodemap(transaction, self)
1085 nodemaputil.setup_persistent_nodemap(transaction, self)
1082
1086
1083 def clearcaches(self):
1087 def clearcaches(self):
1084 """Clear in-memory caches"""
1088 """Clear in-memory caches"""
1085 self._revisioncache = None
1089 self._revisioncache = None
1086 self._chainbasecache.clear()
1090 self._chainbasecache.clear()
1087 self._segmentfile.clear_cache()
1091 self._segmentfile.clear_cache()
1088 self._segmentfile_sidedata.clear_cache()
1092 self._segmentfile_sidedata.clear_cache()
1089 self._pcache = {}
1093 self._pcache = {}
1090 self._nodemap_docket = None
1094 self._nodemap_docket = None
1091 self.index.clearcaches()
1095 self.index.clearcaches()
1092 # The python code is the one responsible for validating the docket, we
1096 # The python code is the one responsible for validating the docket, we
1093 # end up having to refresh it here.
1097 # end up having to refresh it here.
1094 use_nodemap = (
1098 use_nodemap = (
1095 not self._inline
1099 not self._inline
1096 and self._nodemap_file is not None
1100 and self._nodemap_file is not None
1097 and hasattr(self.index, 'update_nodemap_data')
1101 and hasattr(self.index, 'update_nodemap_data')
1098 )
1102 )
1099 if use_nodemap:
1103 if use_nodemap:
1100 nodemap_data = nodemaputil.persisted_data(self)
1104 nodemap_data = nodemaputil.persisted_data(self)
1101 if nodemap_data is not None:
1105 if nodemap_data is not None:
1102 self._nodemap_docket = nodemap_data[0]
1106 self._nodemap_docket = nodemap_data[0]
1103 self.index.update_nodemap_data(*nodemap_data)
1107 self.index.update_nodemap_data(*nodemap_data)
1104
1108
1105 def rev(self, node):
1109 def rev(self, node):
1106 """return the revision number associated with a <nodeid>"""
1110 """return the revision number associated with a <nodeid>"""
1107 try:
1111 try:
1108 return self.index.rev(node)
1112 return self.index.rev(node)
1109 except TypeError:
1113 except TypeError:
1110 raise
1114 raise
1111 except error.RevlogError:
1115 except error.RevlogError:
1112 # parsers.c radix tree lookup failed
1116 # parsers.c radix tree lookup failed
1113 if (
1117 if (
1114 node == self.nodeconstants.wdirid
1118 node == self.nodeconstants.wdirid
1115 or node in self.nodeconstants.wdirfilenodeids
1119 or node in self.nodeconstants.wdirfilenodeids
1116 ):
1120 ):
1117 raise error.WdirUnsupported
1121 raise error.WdirUnsupported
1118 raise error.LookupError(node, self.display_id, _(b'no node'))
1122 raise error.LookupError(node, self.display_id, _(b'no node'))
1119
1123
1120 # Accessors for index entries.
1124 # Accessors for index entries.
1121
1125
1122 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1126 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1123 # are flags.
1127 # are flags.
1124 def start(self, rev):
1128 def start(self, rev):
1125 return int(self.index[rev][0] >> 16)
1129 return int(self.index[rev][0] >> 16)
1126
1130
1127 def sidedata_cut_off(self, rev):
1131 def sidedata_cut_off(self, rev):
1128 sd_cut_off = self.index[rev][8]
1132 sd_cut_off = self.index[rev][8]
1129 if sd_cut_off != 0:
1133 if sd_cut_off != 0:
1130 return sd_cut_off
1134 return sd_cut_off
1131 # This is some annoying dance, because entries without sidedata
1135 # This is some annoying dance, because entries without sidedata
1132 # currently use 0 as their ofsset. (instead of previous-offset +
1136 # currently use 0 as their ofsset. (instead of previous-offset +
1133 # previous-size)
1137 # previous-size)
1134 #
1138 #
1135 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1139 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1136 # In the meantime, we need this.
1140 # In the meantime, we need this.
1137 while 0 <= rev:
1141 while 0 <= rev:
1138 e = self.index[rev]
1142 e = self.index[rev]
1139 if e[9] != 0:
1143 if e[9] != 0:
1140 return e[8] + e[9]
1144 return e[8] + e[9]
1141 rev -= 1
1145 rev -= 1
1142 return 0
1146 return 0
1143
1147
1144 def flags(self, rev):
1148 def flags(self, rev):
1145 return self.index[rev][0] & 0xFFFF
1149 return self.index[rev][0] & 0xFFFF
1146
1150
1147 def length(self, rev):
1151 def length(self, rev):
1148 return self.index[rev][1]
1152 return self.index[rev][1]
1149
1153
1150 def sidedata_length(self, rev):
1154 def sidedata_length(self, rev):
1151 if not self.hassidedata:
1155 if not self.hassidedata:
1152 return 0
1156 return 0
1153 return self.index[rev][9]
1157 return self.index[rev][9]
1154
1158
1155 def rawsize(self, rev):
1159 def rawsize(self, rev):
1156 """return the length of the uncompressed text for a given revision"""
1160 """return the length of the uncompressed text for a given revision"""
1157 l = self.index[rev][2]
1161 l = self.index[rev][2]
1158 if l >= 0:
1162 if l >= 0:
1159 return l
1163 return l
1160
1164
1161 t = self.rawdata(rev)
1165 t = self.rawdata(rev)
1162 return len(t)
1166 return len(t)
1163
1167
1164 def size(self, rev):
1168 def size(self, rev):
1165 """length of non-raw text (processed by a "read" flag processor)"""
1169 """length of non-raw text (processed by a "read" flag processor)"""
1166 # fast path: if no "read" flag processor could change the content,
1170 # fast path: if no "read" flag processor could change the content,
1167 # size is rawsize. note: ELLIPSIS is known to not change the content.
1171 # size is rawsize. note: ELLIPSIS is known to not change the content.
1168 flags = self.flags(rev)
1172 flags = self.flags(rev)
1169 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1173 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1170 return self.rawsize(rev)
1174 return self.rawsize(rev)
1171
1175
1172 return len(self.revision(rev))
1176 return len(self.revision(rev))
1173
1177
1174 def fast_rank(self, rev):
1178 def fast_rank(self, rev):
1175 """Return the rank of a revision if already known, or None otherwise.
1179 """Return the rank of a revision if already known, or None otherwise.
1176
1180
1177 The rank of a revision is the size of the sub-graph it defines as a
1181 The rank of a revision is the size of the sub-graph it defines as a
1178 head. Equivalently, the rank of a revision `r` is the size of the set
1182 head. Equivalently, the rank of a revision `r` is the size of the set
1179 `ancestors(r)`, `r` included.
1183 `ancestors(r)`, `r` included.
1180
1184
1181 This method returns the rank retrieved from the revlog in constant
1185 This method returns the rank retrieved from the revlog in constant
1182 time. It makes no attempt at computing unknown values for versions of
1186 time. It makes no attempt at computing unknown values for versions of
1183 the revlog which do not persist the rank.
1187 the revlog which do not persist the rank.
1184 """
1188 """
1185 rank = self.index[rev][ENTRY_RANK]
1189 rank = self.index[rev][ENTRY_RANK]
1186 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1190 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1187 return None
1191 return None
1188 if rev == nullrev:
1192 if rev == nullrev:
1189 return 0 # convention
1193 return 0 # convention
1190 return rank
1194 return rank
1191
1195
1192 def chainbase(self, rev):
1196 def chainbase(self, rev):
1193 base = self._chainbasecache.get(rev)
1197 base = self._chainbasecache.get(rev)
1194 if base is not None:
1198 if base is not None:
1195 return base
1199 return base
1196
1200
1197 index = self.index
1201 index = self.index
1198 iterrev = rev
1202 iterrev = rev
1199 base = index[iterrev][3]
1203 base = index[iterrev][3]
1200 while base != iterrev:
1204 while base != iterrev:
1201 iterrev = base
1205 iterrev = base
1202 base = index[iterrev][3]
1206 base = index[iterrev][3]
1203
1207
1204 self._chainbasecache[rev] = base
1208 self._chainbasecache[rev] = base
1205 return base
1209 return base
1206
1210
1207 def linkrev(self, rev):
1211 def linkrev(self, rev):
1208 return self.index[rev][4]
1212 return self.index[rev][4]
1209
1213
1210 def parentrevs(self, rev):
1214 def parentrevs(self, rev):
1211 try:
1215 try:
1212 entry = self.index[rev]
1216 entry = self.index[rev]
1213 except IndexError:
1217 except IndexError:
1214 if rev == wdirrev:
1218 if rev == wdirrev:
1215 raise error.WdirUnsupported
1219 raise error.WdirUnsupported
1216 raise
1220 raise
1217
1221
1218 if self.canonical_parent_order and entry[5] == nullrev:
1222 if self.canonical_parent_order and entry[5] == nullrev:
1219 return entry[6], entry[5]
1223 return entry[6], entry[5]
1220 else:
1224 else:
1221 return entry[5], entry[6]
1225 return entry[5], entry[6]
1222
1226
1223 # fast parentrevs(rev) where rev isn't filtered
1227 # fast parentrevs(rev) where rev isn't filtered
1224 _uncheckedparentrevs = parentrevs
1228 _uncheckedparentrevs = parentrevs
1225
1229
1226 def node(self, rev):
1230 def node(self, rev):
1227 try:
1231 try:
1228 return self.index[rev][7]
1232 return self.index[rev][7]
1229 except IndexError:
1233 except IndexError:
1230 if rev == wdirrev:
1234 if rev == wdirrev:
1231 raise error.WdirUnsupported
1235 raise error.WdirUnsupported
1232 raise
1236 raise
1233
1237
1234 # Derived from index values.
1238 # Derived from index values.
1235
1239
1236 def end(self, rev):
1240 def end(self, rev):
1237 return self.start(rev) + self.length(rev)
1241 return self.start(rev) + self.length(rev)
1238
1242
1239 def parents(self, node):
1243 def parents(self, node):
1240 i = self.index
1244 i = self.index
1241 d = i[self.rev(node)]
1245 d = i[self.rev(node)]
1242 # inline node() to avoid function call overhead
1246 # inline node() to avoid function call overhead
1243 if self.canonical_parent_order and d[5] == self.nullid:
1247 if self.canonical_parent_order and d[5] == self.nullid:
1244 return i[d[6]][7], i[d[5]][7]
1248 return i[d[6]][7], i[d[5]][7]
1245 else:
1249 else:
1246 return i[d[5]][7], i[d[6]][7]
1250 return i[d[5]][7], i[d[6]][7]
1247
1251
1248 def chainlen(self, rev):
1252 def chainlen(self, rev):
1249 return self._chaininfo(rev)[0]
1253 return self._chaininfo(rev)[0]
1250
1254
1251 def _chaininfo(self, rev):
1255 def _chaininfo(self, rev):
1252 chaininfocache = self._chaininfocache
1256 chaininfocache = self._chaininfocache
1253 if rev in chaininfocache:
1257 if rev in chaininfocache:
1254 return chaininfocache[rev]
1258 return chaininfocache[rev]
1255 index = self.index
1259 index = self.index
1256 generaldelta = self.delta_config.general_delta
1260 generaldelta = self.delta_config.general_delta
1257 iterrev = rev
1261 iterrev = rev
1258 e = index[iterrev]
1262 e = index[iterrev]
1259 clen = 0
1263 clen = 0
1260 compresseddeltalen = 0
1264 compresseddeltalen = 0
1261 while iterrev != e[3]:
1265 while iterrev != e[3]:
1262 clen += 1
1266 clen += 1
1263 compresseddeltalen += e[1]
1267 compresseddeltalen += e[1]
1264 if generaldelta:
1268 if generaldelta:
1265 iterrev = e[3]
1269 iterrev = e[3]
1266 else:
1270 else:
1267 iterrev -= 1
1271 iterrev -= 1
1268 if iterrev in chaininfocache:
1272 if iterrev in chaininfocache:
1269 t = chaininfocache[iterrev]
1273 t = chaininfocache[iterrev]
1270 clen += t[0]
1274 clen += t[0]
1271 compresseddeltalen += t[1]
1275 compresseddeltalen += t[1]
1272 break
1276 break
1273 e = index[iterrev]
1277 e = index[iterrev]
1274 else:
1278 else:
1275 # Add text length of base since decompressing that also takes
1279 # Add text length of base since decompressing that also takes
1276 # work. For cache hits the length is already included.
1280 # work. For cache hits the length is already included.
1277 compresseddeltalen += e[1]
1281 compresseddeltalen += e[1]
1278 r = (clen, compresseddeltalen)
1282 r = (clen, compresseddeltalen)
1279 chaininfocache[rev] = r
1283 chaininfocache[rev] = r
1280 return r
1284 return r
1281
1285
1282 def _deltachain(self, rev, stoprev=None):
1286 def _deltachain(self, rev, stoprev=None):
1283 """Obtain the delta chain for a revision.
1287 """Obtain the delta chain for a revision.
1284
1288
1285 ``stoprev`` specifies a revision to stop at. If not specified, we
1289 ``stoprev`` specifies a revision to stop at. If not specified, we
1286 stop at the base of the chain.
1290 stop at the base of the chain.
1287
1291
1288 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1292 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1289 revs in ascending order and ``stopped`` is a bool indicating whether
1293 revs in ascending order and ``stopped`` is a bool indicating whether
1290 ``stoprev`` was hit.
1294 ``stoprev`` was hit.
1291 """
1295 """
1292 generaldelta = self.delta_config.general_delta
1296 generaldelta = self.delta_config.general_delta
1293 # Try C implementation.
1297 # Try C implementation.
1294 try:
1298 try:
1295 return self.index.deltachain(rev, stoprev, generaldelta)
1299 return self.index.deltachain(rev, stoprev, generaldelta)
1296 except AttributeError:
1300 except AttributeError:
1297 pass
1301 pass
1298
1302
1299 chain = []
1303 chain = []
1300
1304
1301 # Alias to prevent attribute lookup in tight loop.
1305 # Alias to prevent attribute lookup in tight loop.
1302 index = self.index
1306 index = self.index
1303
1307
1304 iterrev = rev
1308 iterrev = rev
1305 e = index[iterrev]
1309 e = index[iterrev]
1306 while iterrev != e[3] and iterrev != stoprev:
1310 while iterrev != e[3] and iterrev != stoprev:
1307 chain.append(iterrev)
1311 chain.append(iterrev)
1308 if generaldelta:
1312 if generaldelta:
1309 iterrev = e[3]
1313 iterrev = e[3]
1310 else:
1314 else:
1311 iterrev -= 1
1315 iterrev -= 1
1312 e = index[iterrev]
1316 e = index[iterrev]
1313
1317
1314 if iterrev == stoprev:
1318 if iterrev == stoprev:
1315 stopped = True
1319 stopped = True
1316 else:
1320 else:
1317 chain.append(iterrev)
1321 chain.append(iterrev)
1318 stopped = False
1322 stopped = False
1319
1323
1320 chain.reverse()
1324 chain.reverse()
1321 return chain, stopped
1325 return chain, stopped
1322
1326
1323 def ancestors(self, revs, stoprev=0, inclusive=False):
1327 def ancestors(self, revs, stoprev=0, inclusive=False):
1324 """Generate the ancestors of 'revs' in reverse revision order.
1328 """Generate the ancestors of 'revs' in reverse revision order.
1325 Does not generate revs lower than stoprev.
1329 Does not generate revs lower than stoprev.
1326
1330
1327 See the documentation for ancestor.lazyancestors for more details."""
1331 See the documentation for ancestor.lazyancestors for more details."""
1328
1332
1329 # first, make sure start revisions aren't filtered
1333 # first, make sure start revisions aren't filtered
1330 revs = list(revs)
1334 revs = list(revs)
1331 checkrev = self.node
1335 checkrev = self.node
1332 for r in revs:
1336 for r in revs:
1333 checkrev(r)
1337 checkrev(r)
1334 # and we're sure ancestors aren't filtered as well
1338 # and we're sure ancestors aren't filtered as well
1335
1339
1336 if rustancestor is not None and self.index.rust_ext_compat:
1340 if rustancestor is not None and self.index.rust_ext_compat:
1337 lazyancestors = rustancestor.LazyAncestors
1341 lazyancestors = rustancestor.LazyAncestors
1338 arg = self.index
1342 arg = self.index
1339 else:
1343 else:
1340 lazyancestors = ancestor.lazyancestors
1344 lazyancestors = ancestor.lazyancestors
1341 arg = self._uncheckedparentrevs
1345 arg = self._uncheckedparentrevs
1342 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1346 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1343
1347
1344 def descendants(self, revs):
1348 def descendants(self, revs):
1345 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1349 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1346
1350
1347 def findcommonmissing(self, common=None, heads=None):
1351 def findcommonmissing(self, common=None, heads=None):
1348 """Return a tuple of the ancestors of common and the ancestors of heads
1352 """Return a tuple of the ancestors of common and the ancestors of heads
1349 that are not ancestors of common. In revset terminology, we return the
1353 that are not ancestors of common. In revset terminology, we return the
1350 tuple:
1354 tuple:
1351
1355
1352 ::common, (::heads) - (::common)
1356 ::common, (::heads) - (::common)
1353
1357
1354 The list is sorted by revision number, meaning it is
1358 The list is sorted by revision number, meaning it is
1355 topologically sorted.
1359 topologically sorted.
1356
1360
1357 'heads' and 'common' are both lists of node IDs. If heads is
1361 'heads' and 'common' are both lists of node IDs. If heads is
1358 not supplied, uses all of the revlog's heads. If common is not
1362 not supplied, uses all of the revlog's heads. If common is not
1359 supplied, uses nullid."""
1363 supplied, uses nullid."""
1360 if common is None:
1364 if common is None:
1361 common = [self.nullid]
1365 common = [self.nullid]
1362 if heads is None:
1366 if heads is None:
1363 heads = self.heads()
1367 heads = self.heads()
1364
1368
1365 common = [self.rev(n) for n in common]
1369 common = [self.rev(n) for n in common]
1366 heads = [self.rev(n) for n in heads]
1370 heads = [self.rev(n) for n in heads]
1367
1371
1368 # we want the ancestors, but inclusive
1372 # we want the ancestors, but inclusive
1369 class lazyset:
1373 class lazyset:
1370 def __init__(self, lazyvalues):
1374 def __init__(self, lazyvalues):
1371 self.addedvalues = set()
1375 self.addedvalues = set()
1372 self.lazyvalues = lazyvalues
1376 self.lazyvalues = lazyvalues
1373
1377
1374 def __contains__(self, value):
1378 def __contains__(self, value):
1375 return value in self.addedvalues or value in self.lazyvalues
1379 return value in self.addedvalues or value in self.lazyvalues
1376
1380
1377 def __iter__(self):
1381 def __iter__(self):
1378 added = self.addedvalues
1382 added = self.addedvalues
1379 for r in added:
1383 for r in added:
1380 yield r
1384 yield r
1381 for r in self.lazyvalues:
1385 for r in self.lazyvalues:
1382 if not r in added:
1386 if not r in added:
1383 yield r
1387 yield r
1384
1388
1385 def add(self, value):
1389 def add(self, value):
1386 self.addedvalues.add(value)
1390 self.addedvalues.add(value)
1387
1391
1388 def update(self, values):
1392 def update(self, values):
1389 self.addedvalues.update(values)
1393 self.addedvalues.update(values)
1390
1394
1391 has = lazyset(self.ancestors(common))
1395 has = lazyset(self.ancestors(common))
1392 has.add(nullrev)
1396 has.add(nullrev)
1393 has.update(common)
1397 has.update(common)
1394
1398
1395 # take all ancestors from heads that aren't in has
1399 # take all ancestors from heads that aren't in has
1396 missing = set()
1400 missing = set()
1397 visit = collections.deque(r for r in heads if r not in has)
1401 visit = collections.deque(r for r in heads if r not in has)
1398 while visit:
1402 while visit:
1399 r = visit.popleft()
1403 r = visit.popleft()
1400 if r in missing:
1404 if r in missing:
1401 continue
1405 continue
1402 else:
1406 else:
1403 missing.add(r)
1407 missing.add(r)
1404 for p in self.parentrevs(r):
1408 for p in self.parentrevs(r):
1405 if p not in has:
1409 if p not in has:
1406 visit.append(p)
1410 visit.append(p)
1407 missing = list(missing)
1411 missing = list(missing)
1408 missing.sort()
1412 missing.sort()
1409 return has, [self.node(miss) for miss in missing]
1413 return has, [self.node(miss) for miss in missing]
1410
1414
1411 def incrementalmissingrevs(self, common=None):
1415 def incrementalmissingrevs(self, common=None):
1412 """Return an object that can be used to incrementally compute the
1416 """Return an object that can be used to incrementally compute the
1413 revision numbers of the ancestors of arbitrary sets that are not
1417 revision numbers of the ancestors of arbitrary sets that are not
1414 ancestors of common. This is an ancestor.incrementalmissingancestors
1418 ancestors of common. This is an ancestor.incrementalmissingancestors
1415 object.
1419 object.
1416
1420
1417 'common' is a list of revision numbers. If common is not supplied, uses
1421 'common' is a list of revision numbers. If common is not supplied, uses
1418 nullrev.
1422 nullrev.
1419 """
1423 """
1420 if common is None:
1424 if common is None:
1421 common = [nullrev]
1425 common = [nullrev]
1422
1426
1423 if rustancestor is not None and self.index.rust_ext_compat:
1427 if rustancestor is not None and self.index.rust_ext_compat:
1424 return rustancestor.MissingAncestors(self.index, common)
1428 return rustancestor.MissingAncestors(self.index, common)
1425 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1429 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1426
1430
1427 def findmissingrevs(self, common=None, heads=None):
1431 def findmissingrevs(self, common=None, heads=None):
1428 """Return the revision numbers of the ancestors of heads that
1432 """Return the revision numbers of the ancestors of heads that
1429 are not ancestors of common.
1433 are not ancestors of common.
1430
1434
1431 More specifically, return a list of revision numbers corresponding to
1435 More specifically, return a list of revision numbers corresponding to
1432 nodes N such that every N satisfies the following constraints:
1436 nodes N such that every N satisfies the following constraints:
1433
1437
1434 1. N is an ancestor of some node in 'heads'
1438 1. N is an ancestor of some node in 'heads'
1435 2. N is not an ancestor of any node in 'common'
1439 2. N is not an ancestor of any node in 'common'
1436
1440
1437 The list is sorted by revision number, meaning it is
1441 The list is sorted by revision number, meaning it is
1438 topologically sorted.
1442 topologically sorted.
1439
1443
1440 'heads' and 'common' are both lists of revision numbers. If heads is
1444 'heads' and 'common' are both lists of revision numbers. If heads is
1441 not supplied, uses all of the revlog's heads. If common is not
1445 not supplied, uses all of the revlog's heads. If common is not
1442 supplied, uses nullid."""
1446 supplied, uses nullid."""
1443 if common is None:
1447 if common is None:
1444 common = [nullrev]
1448 common = [nullrev]
1445 if heads is None:
1449 if heads is None:
1446 heads = self.headrevs()
1450 heads = self.headrevs()
1447
1451
1448 inc = self.incrementalmissingrevs(common=common)
1452 inc = self.incrementalmissingrevs(common=common)
1449 return inc.missingancestors(heads)
1453 return inc.missingancestors(heads)
1450
1454
1451 def findmissing(self, common=None, heads=None):
1455 def findmissing(self, common=None, heads=None):
1452 """Return the ancestors of heads that are not ancestors of common.
1456 """Return the ancestors of heads that are not ancestors of common.
1453
1457
1454 More specifically, return a list of nodes N such that every N
1458 More specifically, return a list of nodes N such that every N
1455 satisfies the following constraints:
1459 satisfies the following constraints:
1456
1460
1457 1. N is an ancestor of some node in 'heads'
1461 1. N is an ancestor of some node in 'heads'
1458 2. N is not an ancestor of any node in 'common'
1462 2. N is not an ancestor of any node in 'common'
1459
1463
1460 The list is sorted by revision number, meaning it is
1464 The list is sorted by revision number, meaning it is
1461 topologically sorted.
1465 topologically sorted.
1462
1466
1463 'heads' and 'common' are both lists of node IDs. If heads is
1467 'heads' and 'common' are both lists of node IDs. If heads is
1464 not supplied, uses all of the revlog's heads. If common is not
1468 not supplied, uses all of the revlog's heads. If common is not
1465 supplied, uses nullid."""
1469 supplied, uses nullid."""
1466 if common is None:
1470 if common is None:
1467 common = [self.nullid]
1471 common = [self.nullid]
1468 if heads is None:
1472 if heads is None:
1469 heads = self.heads()
1473 heads = self.heads()
1470
1474
1471 common = [self.rev(n) for n in common]
1475 common = [self.rev(n) for n in common]
1472 heads = [self.rev(n) for n in heads]
1476 heads = [self.rev(n) for n in heads]
1473
1477
1474 inc = self.incrementalmissingrevs(common=common)
1478 inc = self.incrementalmissingrevs(common=common)
1475 return [self.node(r) for r in inc.missingancestors(heads)]
1479 return [self.node(r) for r in inc.missingancestors(heads)]
1476
1480
1477 def nodesbetween(self, roots=None, heads=None):
1481 def nodesbetween(self, roots=None, heads=None):
1478 """Return a topological path from 'roots' to 'heads'.
1482 """Return a topological path from 'roots' to 'heads'.
1479
1483
1480 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1484 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1481 topologically sorted list of all nodes N that satisfy both of
1485 topologically sorted list of all nodes N that satisfy both of
1482 these constraints:
1486 these constraints:
1483
1487
1484 1. N is a descendant of some node in 'roots'
1488 1. N is a descendant of some node in 'roots'
1485 2. N is an ancestor of some node in 'heads'
1489 2. N is an ancestor of some node in 'heads'
1486
1490
1487 Every node is considered to be both a descendant and an ancestor
1491 Every node is considered to be both a descendant and an ancestor
1488 of itself, so every reachable node in 'roots' and 'heads' will be
1492 of itself, so every reachable node in 'roots' and 'heads' will be
1489 included in 'nodes'.
1493 included in 'nodes'.
1490
1494
1491 'outroots' is the list of reachable nodes in 'roots', i.e., the
1495 'outroots' is the list of reachable nodes in 'roots', i.e., the
1492 subset of 'roots' that is returned in 'nodes'. Likewise,
1496 subset of 'roots' that is returned in 'nodes'. Likewise,
1493 'outheads' is the subset of 'heads' that is also in 'nodes'.
1497 'outheads' is the subset of 'heads' that is also in 'nodes'.
1494
1498
1495 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1499 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1496 unspecified, uses nullid as the only root. If 'heads' is
1500 unspecified, uses nullid as the only root. If 'heads' is
1497 unspecified, uses list of all of the revlog's heads."""
1501 unspecified, uses list of all of the revlog's heads."""
1498 nonodes = ([], [], [])
1502 nonodes = ([], [], [])
1499 if roots is not None:
1503 if roots is not None:
1500 roots = list(roots)
1504 roots = list(roots)
1501 if not roots:
1505 if not roots:
1502 return nonodes
1506 return nonodes
1503 lowestrev = min([self.rev(n) for n in roots])
1507 lowestrev = min([self.rev(n) for n in roots])
1504 else:
1508 else:
1505 roots = [self.nullid] # Everybody's a descendant of nullid
1509 roots = [self.nullid] # Everybody's a descendant of nullid
1506 lowestrev = nullrev
1510 lowestrev = nullrev
1507 if (lowestrev == nullrev) and (heads is None):
1511 if (lowestrev == nullrev) and (heads is None):
1508 # We want _all_ the nodes!
1512 # We want _all_ the nodes!
1509 return (
1513 return (
1510 [self.node(r) for r in self],
1514 [self.node(r) for r in self],
1511 [self.nullid],
1515 [self.nullid],
1512 list(self.heads()),
1516 list(self.heads()),
1513 )
1517 )
1514 if heads is None:
1518 if heads is None:
1515 # All nodes are ancestors, so the latest ancestor is the last
1519 # All nodes are ancestors, so the latest ancestor is the last
1516 # node.
1520 # node.
1517 highestrev = len(self) - 1
1521 highestrev = len(self) - 1
1518 # Set ancestors to None to signal that every node is an ancestor.
1522 # Set ancestors to None to signal that every node is an ancestor.
1519 ancestors = None
1523 ancestors = None
1520 # Set heads to an empty dictionary for later discovery of heads
1524 # Set heads to an empty dictionary for later discovery of heads
1521 heads = {}
1525 heads = {}
1522 else:
1526 else:
1523 heads = list(heads)
1527 heads = list(heads)
1524 if not heads:
1528 if not heads:
1525 return nonodes
1529 return nonodes
1526 ancestors = set()
1530 ancestors = set()
1527 # Turn heads into a dictionary so we can remove 'fake' heads.
1531 # Turn heads into a dictionary so we can remove 'fake' heads.
1528 # Also, later we will be using it to filter out the heads we can't
1532 # Also, later we will be using it to filter out the heads we can't
1529 # find from roots.
1533 # find from roots.
1530 heads = dict.fromkeys(heads, False)
1534 heads = dict.fromkeys(heads, False)
1531 # Start at the top and keep marking parents until we're done.
1535 # Start at the top and keep marking parents until we're done.
1532 nodestotag = set(heads)
1536 nodestotag = set(heads)
1533 # Remember where the top was so we can use it as a limit later.
1537 # Remember where the top was so we can use it as a limit later.
1534 highestrev = max([self.rev(n) for n in nodestotag])
1538 highestrev = max([self.rev(n) for n in nodestotag])
1535 while nodestotag:
1539 while nodestotag:
1536 # grab a node to tag
1540 # grab a node to tag
1537 n = nodestotag.pop()
1541 n = nodestotag.pop()
1538 # Never tag nullid
1542 # Never tag nullid
1539 if n == self.nullid:
1543 if n == self.nullid:
1540 continue
1544 continue
1541 # A node's revision number represents its place in a
1545 # A node's revision number represents its place in a
1542 # topologically sorted list of nodes.
1546 # topologically sorted list of nodes.
1543 r = self.rev(n)
1547 r = self.rev(n)
1544 if r >= lowestrev:
1548 if r >= lowestrev:
1545 if n not in ancestors:
1549 if n not in ancestors:
1546 # If we are possibly a descendant of one of the roots
1550 # If we are possibly a descendant of one of the roots
1547 # and we haven't already been marked as an ancestor
1551 # and we haven't already been marked as an ancestor
1548 ancestors.add(n) # Mark as ancestor
1552 ancestors.add(n) # Mark as ancestor
1549 # Add non-nullid parents to list of nodes to tag.
1553 # Add non-nullid parents to list of nodes to tag.
1550 nodestotag.update(
1554 nodestotag.update(
1551 [p for p in self.parents(n) if p != self.nullid]
1555 [p for p in self.parents(n) if p != self.nullid]
1552 )
1556 )
1553 elif n in heads: # We've seen it before, is it a fake head?
1557 elif n in heads: # We've seen it before, is it a fake head?
1554 # So it is, real heads should not be the ancestors of
1558 # So it is, real heads should not be the ancestors of
1555 # any other heads.
1559 # any other heads.
1556 heads.pop(n)
1560 heads.pop(n)
1557 if not ancestors:
1561 if not ancestors:
1558 return nonodes
1562 return nonodes
1559 # Now that we have our set of ancestors, we want to remove any
1563 # Now that we have our set of ancestors, we want to remove any
1560 # roots that are not ancestors.
1564 # roots that are not ancestors.
1561
1565
1562 # If one of the roots was nullid, everything is included anyway.
1566 # If one of the roots was nullid, everything is included anyway.
1563 if lowestrev > nullrev:
1567 if lowestrev > nullrev:
1564 # But, since we weren't, let's recompute the lowest rev to not
1568 # But, since we weren't, let's recompute the lowest rev to not
1565 # include roots that aren't ancestors.
1569 # include roots that aren't ancestors.
1566
1570
1567 # Filter out roots that aren't ancestors of heads
1571 # Filter out roots that aren't ancestors of heads
1568 roots = [root for root in roots if root in ancestors]
1572 roots = [root for root in roots if root in ancestors]
1569 # Recompute the lowest revision
1573 # Recompute the lowest revision
1570 if roots:
1574 if roots:
1571 lowestrev = min([self.rev(root) for root in roots])
1575 lowestrev = min([self.rev(root) for root in roots])
1572 else:
1576 else:
1573 # No more roots? Return empty list
1577 # No more roots? Return empty list
1574 return nonodes
1578 return nonodes
1575 else:
1579 else:
1576 # We are descending from nullid, and don't need to care about
1580 # We are descending from nullid, and don't need to care about
1577 # any other roots.
1581 # any other roots.
1578 lowestrev = nullrev
1582 lowestrev = nullrev
1579 roots = [self.nullid]
1583 roots = [self.nullid]
1580 # Transform our roots list into a set.
1584 # Transform our roots list into a set.
1581 descendants = set(roots)
1585 descendants = set(roots)
1582 # Also, keep the original roots so we can filter out roots that aren't
1586 # Also, keep the original roots so we can filter out roots that aren't
1583 # 'real' roots (i.e. are descended from other roots).
1587 # 'real' roots (i.e. are descended from other roots).
1584 roots = descendants.copy()
1588 roots = descendants.copy()
1585 # Our topologically sorted list of output nodes.
1589 # Our topologically sorted list of output nodes.
1586 orderedout = []
1590 orderedout = []
1587 # Don't start at nullid since we don't want nullid in our output list,
1591 # Don't start at nullid since we don't want nullid in our output list,
1588 # and if nullid shows up in descendants, empty parents will look like
1592 # and if nullid shows up in descendants, empty parents will look like
1589 # they're descendants.
1593 # they're descendants.
1590 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1594 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1591 n = self.node(r)
1595 n = self.node(r)
1592 isdescendant = False
1596 isdescendant = False
1593 if lowestrev == nullrev: # Everybody is a descendant of nullid
1597 if lowestrev == nullrev: # Everybody is a descendant of nullid
1594 isdescendant = True
1598 isdescendant = True
1595 elif n in descendants:
1599 elif n in descendants:
1596 # n is already a descendant
1600 # n is already a descendant
1597 isdescendant = True
1601 isdescendant = True
1598 # This check only needs to be done here because all the roots
1602 # This check only needs to be done here because all the roots
1599 # will start being marked is descendants before the loop.
1603 # will start being marked is descendants before the loop.
1600 if n in roots:
1604 if n in roots:
1601 # If n was a root, check if it's a 'real' root.
1605 # If n was a root, check if it's a 'real' root.
1602 p = tuple(self.parents(n))
1606 p = tuple(self.parents(n))
1603 # If any of its parents are descendants, it's not a root.
1607 # If any of its parents are descendants, it's not a root.
1604 if (p[0] in descendants) or (p[1] in descendants):
1608 if (p[0] in descendants) or (p[1] in descendants):
1605 roots.remove(n)
1609 roots.remove(n)
1606 else:
1610 else:
1607 p = tuple(self.parents(n))
1611 p = tuple(self.parents(n))
1608 # A node is a descendant if either of its parents are
1612 # A node is a descendant if either of its parents are
1609 # descendants. (We seeded the dependents list with the roots
1613 # descendants. (We seeded the dependents list with the roots
1610 # up there, remember?)
1614 # up there, remember?)
1611 if (p[0] in descendants) or (p[1] in descendants):
1615 if (p[0] in descendants) or (p[1] in descendants):
1612 descendants.add(n)
1616 descendants.add(n)
1613 isdescendant = True
1617 isdescendant = True
1614 if isdescendant and ((ancestors is None) or (n in ancestors)):
1618 if isdescendant and ((ancestors is None) or (n in ancestors)):
1615 # Only include nodes that are both descendants and ancestors.
1619 # Only include nodes that are both descendants and ancestors.
1616 orderedout.append(n)
1620 orderedout.append(n)
1617 if (ancestors is not None) and (n in heads):
1621 if (ancestors is not None) and (n in heads):
1618 # We're trying to figure out which heads are reachable
1622 # We're trying to figure out which heads are reachable
1619 # from roots.
1623 # from roots.
1620 # Mark this head as having been reached
1624 # Mark this head as having been reached
1621 heads[n] = True
1625 heads[n] = True
1622 elif ancestors is None:
1626 elif ancestors is None:
1623 # Otherwise, we're trying to discover the heads.
1627 # Otherwise, we're trying to discover the heads.
1624 # Assume this is a head because if it isn't, the next step
1628 # Assume this is a head because if it isn't, the next step
1625 # will eventually remove it.
1629 # will eventually remove it.
1626 heads[n] = True
1630 heads[n] = True
1627 # But, obviously its parents aren't.
1631 # But, obviously its parents aren't.
1628 for p in self.parents(n):
1632 for p in self.parents(n):
1629 heads.pop(p, None)
1633 heads.pop(p, None)
1630 heads = [head for head, flag in heads.items() if flag]
1634 heads = [head for head, flag in heads.items() if flag]
1631 roots = list(roots)
1635 roots = list(roots)
1632 assert orderedout
1636 assert orderedout
1633 assert roots
1637 assert roots
1634 assert heads
1638 assert heads
1635 return (orderedout, roots, heads)
1639 return (orderedout, roots, heads)
1636
1640
1637 def headrevs(self, revs=None):
1641 def headrevs(self, revs=None):
1638 if revs is None:
1642 if revs is None:
1639 try:
1643 try:
1640 return self.index.headrevs()
1644 return self.index.headrevs()
1641 except AttributeError:
1645 except AttributeError:
1642 return self._headrevs()
1646 return self._headrevs()
1643 if rustdagop is not None and self.index.rust_ext_compat:
1647 if rustdagop is not None and self.index.rust_ext_compat:
1644 return rustdagop.headrevs(self.index, revs)
1648 return rustdagop.headrevs(self.index, revs)
1645 return dagop.headrevs(revs, self._uncheckedparentrevs)
1649 return dagop.headrevs(revs, self._uncheckedparentrevs)
1646
1650
1647 def computephases(self, roots):
1651 def computephases(self, roots):
1648 return self.index.computephasesmapsets(roots)
1652 return self.index.computephasesmapsets(roots)
1649
1653
1650 def _headrevs(self):
1654 def _headrevs(self):
1651 count = len(self)
1655 count = len(self)
1652 if not count:
1656 if not count:
1653 return [nullrev]
1657 return [nullrev]
1654 # we won't iter over filtered rev so nobody is a head at start
1658 # we won't iter over filtered rev so nobody is a head at start
1655 ishead = [0] * (count + 1)
1659 ishead = [0] * (count + 1)
1656 index = self.index
1660 index = self.index
1657 for r in self:
1661 for r in self:
1658 ishead[r] = 1 # I may be an head
1662 ishead[r] = 1 # I may be an head
1659 e = index[r]
1663 e = index[r]
1660 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1664 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1661 return [r for r, val in enumerate(ishead) if val]
1665 return [r for r, val in enumerate(ishead) if val]
1662
1666
1663 def heads(self, start=None, stop=None):
1667 def heads(self, start=None, stop=None):
1664 """return the list of all nodes that have no children
1668 """return the list of all nodes that have no children
1665
1669
1666 if start is specified, only heads that are descendants of
1670 if start is specified, only heads that are descendants of
1667 start will be returned
1671 start will be returned
1668 if stop is specified, it will consider all the revs from stop
1672 if stop is specified, it will consider all the revs from stop
1669 as if they had no children
1673 as if they had no children
1670 """
1674 """
1671 if start is None and stop is None:
1675 if start is None and stop is None:
1672 if not len(self):
1676 if not len(self):
1673 return [self.nullid]
1677 return [self.nullid]
1674 return [self.node(r) for r in self.headrevs()]
1678 return [self.node(r) for r in self.headrevs()]
1675
1679
1676 if start is None:
1680 if start is None:
1677 start = nullrev
1681 start = nullrev
1678 else:
1682 else:
1679 start = self.rev(start)
1683 start = self.rev(start)
1680
1684
1681 stoprevs = {self.rev(n) for n in stop or []}
1685 stoprevs = {self.rev(n) for n in stop or []}
1682
1686
1683 revs = dagop.headrevssubset(
1687 revs = dagop.headrevssubset(
1684 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1688 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1685 )
1689 )
1686
1690
1687 return [self.node(rev) for rev in revs]
1691 return [self.node(rev) for rev in revs]
1688
1692
1689 def children(self, node):
1693 def children(self, node):
1690 """find the children of a given node"""
1694 """find the children of a given node"""
1691 c = []
1695 c = []
1692 p = self.rev(node)
1696 p = self.rev(node)
1693 for r in self.revs(start=p + 1):
1697 for r in self.revs(start=p + 1):
1694 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1698 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1695 if prevs:
1699 if prevs:
1696 for pr in prevs:
1700 for pr in prevs:
1697 if pr == p:
1701 if pr == p:
1698 c.append(self.node(r))
1702 c.append(self.node(r))
1699 elif p == nullrev:
1703 elif p == nullrev:
1700 c.append(self.node(r))
1704 c.append(self.node(r))
1701 return c
1705 return c
1702
1706
1703 def commonancestorsheads(self, a, b):
1707 def commonancestorsheads(self, a, b):
1704 """calculate all the heads of the common ancestors of nodes a and b"""
1708 """calculate all the heads of the common ancestors of nodes a and b"""
1705 a, b = self.rev(a), self.rev(b)
1709 a, b = self.rev(a), self.rev(b)
1706 ancs = self._commonancestorsheads(a, b)
1710 ancs = self._commonancestorsheads(a, b)
1707 return pycompat.maplist(self.node, ancs)
1711 return pycompat.maplist(self.node, ancs)
1708
1712
1709 def _commonancestorsheads(self, *revs):
1713 def _commonancestorsheads(self, *revs):
1710 """calculate all the heads of the common ancestors of revs"""
1714 """calculate all the heads of the common ancestors of revs"""
1711 try:
1715 try:
1712 ancs = self.index.commonancestorsheads(*revs)
1716 ancs = self.index.commonancestorsheads(*revs)
1713 except (AttributeError, OverflowError): # C implementation failed
1717 except (AttributeError, OverflowError): # C implementation failed
1714 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1718 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1715 return ancs
1719 return ancs
1716
1720
1717 def isancestor(self, a, b):
1721 def isancestor(self, a, b):
1718 """return True if node a is an ancestor of node b
1722 """return True if node a is an ancestor of node b
1719
1723
1720 A revision is considered an ancestor of itself."""
1724 A revision is considered an ancestor of itself."""
1721 a, b = self.rev(a), self.rev(b)
1725 a, b = self.rev(a), self.rev(b)
1722 return self.isancestorrev(a, b)
1726 return self.isancestorrev(a, b)
1723
1727
1724 def isancestorrev(self, a, b):
1728 def isancestorrev(self, a, b):
1725 """return True if revision a is an ancestor of revision b
1729 """return True if revision a is an ancestor of revision b
1726
1730
1727 A revision is considered an ancestor of itself.
1731 A revision is considered an ancestor of itself.
1728
1732
1729 The implementation of this is trivial but the use of
1733 The implementation of this is trivial but the use of
1730 reachableroots is not."""
1734 reachableroots is not."""
1731 if a == nullrev:
1735 if a == nullrev:
1732 return True
1736 return True
1733 elif a == b:
1737 elif a == b:
1734 return True
1738 return True
1735 elif a > b:
1739 elif a > b:
1736 return False
1740 return False
1737 return bool(self.reachableroots(a, [b], [a], includepath=False))
1741 return bool(self.reachableroots(a, [b], [a], includepath=False))
1738
1742
1739 def reachableroots(self, minroot, heads, roots, includepath=False):
1743 def reachableroots(self, minroot, heads, roots, includepath=False):
1740 """return (heads(::(<roots> and <roots>::<heads>)))
1744 """return (heads(::(<roots> and <roots>::<heads>)))
1741
1745
1742 If includepath is True, return (<roots>::<heads>)."""
1746 If includepath is True, return (<roots>::<heads>)."""
1743 try:
1747 try:
1744 return self.index.reachableroots2(
1748 return self.index.reachableroots2(
1745 minroot, heads, roots, includepath
1749 minroot, heads, roots, includepath
1746 )
1750 )
1747 except AttributeError:
1751 except AttributeError:
1748 return dagop._reachablerootspure(
1752 return dagop._reachablerootspure(
1749 self.parentrevs, minroot, roots, heads, includepath
1753 self.parentrevs, minroot, roots, heads, includepath
1750 )
1754 )
1751
1755
1752 def ancestor(self, a, b):
1756 def ancestor(self, a, b):
1753 """calculate the "best" common ancestor of nodes a and b"""
1757 """calculate the "best" common ancestor of nodes a and b"""
1754
1758
1755 a, b = self.rev(a), self.rev(b)
1759 a, b = self.rev(a), self.rev(b)
1756 try:
1760 try:
1757 ancs = self.index.ancestors(a, b)
1761 ancs = self.index.ancestors(a, b)
1758 except (AttributeError, OverflowError):
1762 except (AttributeError, OverflowError):
1759 ancs = ancestor.ancestors(self.parentrevs, a, b)
1763 ancs = ancestor.ancestors(self.parentrevs, a, b)
1760 if ancs:
1764 if ancs:
1761 # choose a consistent winner when there's a tie
1765 # choose a consistent winner when there's a tie
1762 return min(map(self.node, ancs))
1766 return min(map(self.node, ancs))
1763 return self.nullid
1767 return self.nullid
1764
1768
1765 def _match(self, id):
1769 def _match(self, id):
1766 if isinstance(id, int):
1770 if isinstance(id, int):
1767 # rev
1771 # rev
1768 return self.node(id)
1772 return self.node(id)
1769 if len(id) == self.nodeconstants.nodelen:
1773 if len(id) == self.nodeconstants.nodelen:
1770 # possibly a binary node
1774 # possibly a binary node
1771 # odds of a binary node being all hex in ASCII are 1 in 10**25
1775 # odds of a binary node being all hex in ASCII are 1 in 10**25
1772 try:
1776 try:
1773 node = id
1777 node = id
1774 self.rev(node) # quick search the index
1778 self.rev(node) # quick search the index
1775 return node
1779 return node
1776 except error.LookupError:
1780 except error.LookupError:
1777 pass # may be partial hex id
1781 pass # may be partial hex id
1778 try:
1782 try:
1779 # str(rev)
1783 # str(rev)
1780 rev = int(id)
1784 rev = int(id)
1781 if b"%d" % rev != id:
1785 if b"%d" % rev != id:
1782 raise ValueError
1786 raise ValueError
1783 if rev < 0:
1787 if rev < 0:
1784 rev = len(self) + rev
1788 rev = len(self) + rev
1785 if rev < 0 or rev >= len(self):
1789 if rev < 0 or rev >= len(self):
1786 raise ValueError
1790 raise ValueError
1787 return self.node(rev)
1791 return self.node(rev)
1788 except (ValueError, OverflowError):
1792 except (ValueError, OverflowError):
1789 pass
1793 pass
1790 if len(id) == 2 * self.nodeconstants.nodelen:
1794 if len(id) == 2 * self.nodeconstants.nodelen:
1791 try:
1795 try:
1792 # a full hex nodeid?
1796 # a full hex nodeid?
1793 node = bin(id)
1797 node = bin(id)
1794 self.rev(node)
1798 self.rev(node)
1795 return node
1799 return node
1796 except (binascii.Error, error.LookupError):
1800 except (binascii.Error, error.LookupError):
1797 pass
1801 pass
1798
1802
1799 def _partialmatch(self, id):
1803 def _partialmatch(self, id):
1800 # we don't care wdirfilenodeids as they should be always full hash
1804 # we don't care wdirfilenodeids as they should be always full hash
1801 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1805 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1802 ambiguous = False
1806 ambiguous = False
1803 try:
1807 try:
1804 partial = self.index.partialmatch(id)
1808 partial = self.index.partialmatch(id)
1805 if partial and self.hasnode(partial):
1809 if partial and self.hasnode(partial):
1806 if maybewdir:
1810 if maybewdir:
1807 # single 'ff...' match in radix tree, ambiguous with wdir
1811 # single 'ff...' match in radix tree, ambiguous with wdir
1808 ambiguous = True
1812 ambiguous = True
1809 else:
1813 else:
1810 return partial
1814 return partial
1811 elif maybewdir:
1815 elif maybewdir:
1812 # no 'ff...' match in radix tree, wdir identified
1816 # no 'ff...' match in radix tree, wdir identified
1813 raise error.WdirUnsupported
1817 raise error.WdirUnsupported
1814 else:
1818 else:
1815 return None
1819 return None
1816 except error.RevlogError:
1820 except error.RevlogError:
1817 # parsers.c radix tree lookup gave multiple matches
1821 # parsers.c radix tree lookup gave multiple matches
1818 # fast path: for unfiltered changelog, radix tree is accurate
1822 # fast path: for unfiltered changelog, radix tree is accurate
1819 if not getattr(self, 'filteredrevs', None):
1823 if not getattr(self, 'filteredrevs', None):
1820 ambiguous = True
1824 ambiguous = True
1821 # fall through to slow path that filters hidden revisions
1825 # fall through to slow path that filters hidden revisions
1822 except (AttributeError, ValueError):
1826 except (AttributeError, ValueError):
1823 # we are pure python, or key is not hex
1827 # we are pure python, or key is not hex
1824 pass
1828 pass
1825 if ambiguous:
1829 if ambiguous:
1826 raise error.AmbiguousPrefixLookupError(
1830 raise error.AmbiguousPrefixLookupError(
1827 id, self.display_id, _(b'ambiguous identifier')
1831 id, self.display_id, _(b'ambiguous identifier')
1828 )
1832 )
1829
1833
1830 if id in self._pcache:
1834 if id in self._pcache:
1831 return self._pcache[id]
1835 return self._pcache[id]
1832
1836
1833 if len(id) <= 40:
1837 if len(id) <= 40:
1834 # hex(node)[:...]
1838 # hex(node)[:...]
1835 l = len(id) // 2 * 2 # grab an even number of digits
1839 l = len(id) // 2 * 2 # grab an even number of digits
1836 try:
1840 try:
1837 # we're dropping the last digit, so let's check that it's hex,
1841 # we're dropping the last digit, so let's check that it's hex,
1838 # to avoid the expensive computation below if it's not
1842 # to avoid the expensive computation below if it's not
1839 if len(id) % 2 > 0:
1843 if len(id) % 2 > 0:
1840 if not (id[-1] in hexdigits):
1844 if not (id[-1] in hexdigits):
1841 return None
1845 return None
1842 prefix = bin(id[:l])
1846 prefix = bin(id[:l])
1843 except binascii.Error:
1847 except binascii.Error:
1844 pass
1848 pass
1845 else:
1849 else:
1846 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1850 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1847 nl = [
1851 nl = [
1848 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1852 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1849 ]
1853 ]
1850 if self.nodeconstants.nullhex.startswith(id):
1854 if self.nodeconstants.nullhex.startswith(id):
1851 nl.append(self.nullid)
1855 nl.append(self.nullid)
1852 if len(nl) > 0:
1856 if len(nl) > 0:
1853 if len(nl) == 1 and not maybewdir:
1857 if len(nl) == 1 and not maybewdir:
1854 self._pcache[id] = nl[0]
1858 self._pcache[id] = nl[0]
1855 return nl[0]
1859 return nl[0]
1856 raise error.AmbiguousPrefixLookupError(
1860 raise error.AmbiguousPrefixLookupError(
1857 id, self.display_id, _(b'ambiguous identifier')
1861 id, self.display_id, _(b'ambiguous identifier')
1858 )
1862 )
1859 if maybewdir:
1863 if maybewdir:
1860 raise error.WdirUnsupported
1864 raise error.WdirUnsupported
1861 return None
1865 return None
1862
1866
1863 def lookup(self, id):
1867 def lookup(self, id):
1864 """locate a node based on:
1868 """locate a node based on:
1865 - revision number or str(revision number)
1869 - revision number or str(revision number)
1866 - nodeid or subset of hex nodeid
1870 - nodeid or subset of hex nodeid
1867 """
1871 """
1868 n = self._match(id)
1872 n = self._match(id)
1869 if n is not None:
1873 if n is not None:
1870 return n
1874 return n
1871 n = self._partialmatch(id)
1875 n = self._partialmatch(id)
1872 if n:
1876 if n:
1873 return n
1877 return n
1874
1878
1875 raise error.LookupError(id, self.display_id, _(b'no match found'))
1879 raise error.LookupError(id, self.display_id, _(b'no match found'))
1876
1880
1877 def shortest(self, node, minlength=1):
1881 def shortest(self, node, minlength=1):
1878 """Find the shortest unambiguous prefix that matches node."""
1882 """Find the shortest unambiguous prefix that matches node."""
1879
1883
1880 def isvalid(prefix):
1884 def isvalid(prefix):
1881 try:
1885 try:
1882 matchednode = self._partialmatch(prefix)
1886 matchednode = self._partialmatch(prefix)
1883 except error.AmbiguousPrefixLookupError:
1887 except error.AmbiguousPrefixLookupError:
1884 return False
1888 return False
1885 except error.WdirUnsupported:
1889 except error.WdirUnsupported:
1886 # single 'ff...' match
1890 # single 'ff...' match
1887 return True
1891 return True
1888 if matchednode is None:
1892 if matchednode is None:
1889 raise error.LookupError(node, self.display_id, _(b'no node'))
1893 raise error.LookupError(node, self.display_id, _(b'no node'))
1890 return True
1894 return True
1891
1895
1892 def maybewdir(prefix):
1896 def maybewdir(prefix):
1893 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1897 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1894
1898
1895 hexnode = hex(node)
1899 hexnode = hex(node)
1896
1900
1897 def disambiguate(hexnode, minlength):
1901 def disambiguate(hexnode, minlength):
1898 """Disambiguate against wdirid."""
1902 """Disambiguate against wdirid."""
1899 for length in range(minlength, len(hexnode) + 1):
1903 for length in range(minlength, len(hexnode) + 1):
1900 prefix = hexnode[:length]
1904 prefix = hexnode[:length]
1901 if not maybewdir(prefix):
1905 if not maybewdir(prefix):
1902 return prefix
1906 return prefix
1903
1907
1904 if not getattr(self, 'filteredrevs', None):
1908 if not getattr(self, 'filteredrevs', None):
1905 try:
1909 try:
1906 length = max(self.index.shortest(node), minlength)
1910 length = max(self.index.shortest(node), minlength)
1907 return disambiguate(hexnode, length)
1911 return disambiguate(hexnode, length)
1908 except error.RevlogError:
1912 except error.RevlogError:
1909 if node != self.nodeconstants.wdirid:
1913 if node != self.nodeconstants.wdirid:
1910 raise error.LookupError(
1914 raise error.LookupError(
1911 node, self.display_id, _(b'no node')
1915 node, self.display_id, _(b'no node')
1912 )
1916 )
1913 except AttributeError:
1917 except AttributeError:
1914 # Fall through to pure code
1918 # Fall through to pure code
1915 pass
1919 pass
1916
1920
1917 if node == self.nodeconstants.wdirid:
1921 if node == self.nodeconstants.wdirid:
1918 for length in range(minlength, len(hexnode) + 1):
1922 for length in range(minlength, len(hexnode) + 1):
1919 prefix = hexnode[:length]
1923 prefix = hexnode[:length]
1920 if isvalid(prefix):
1924 if isvalid(prefix):
1921 return prefix
1925 return prefix
1922
1926
1923 for length in range(minlength, len(hexnode) + 1):
1927 for length in range(minlength, len(hexnode) + 1):
1924 prefix = hexnode[:length]
1928 prefix = hexnode[:length]
1925 if isvalid(prefix):
1929 if isvalid(prefix):
1926 return disambiguate(hexnode, length)
1930 return disambiguate(hexnode, length)
1927
1931
1928 def cmp(self, node, text):
1932 def cmp(self, node, text):
1929 """compare text with a given file revision
1933 """compare text with a given file revision
1930
1934
1931 returns True if text is different than what is stored.
1935 returns True if text is different than what is stored.
1932 """
1936 """
1933 p1, p2 = self.parents(node)
1937 p1, p2 = self.parents(node)
1934 return storageutil.hashrevisionsha1(text, p1, p2) != node
1938 return storageutil.hashrevisionsha1(text, p1, p2) != node
1935
1939
1936 def _getsegmentforrevs(self, startrev, endrev):
1940 def _getsegmentforrevs(self, startrev, endrev):
1937 """Obtain a segment of raw data corresponding to a range of revisions.
1941 """Obtain a segment of raw data corresponding to a range of revisions.
1938
1942
1939 Accepts the start and end revisions and an optional already-open
1943 Accepts the start and end revisions and an optional already-open
1940 file handle to be used for reading. If the file handle is read, its
1944 file handle to be used for reading. If the file handle is read, its
1941 seek position will not be preserved.
1945 seek position will not be preserved.
1942
1946
1943 Requests for data may be satisfied by a cache.
1947 Requests for data may be satisfied by a cache.
1944
1948
1945 Returns a 2-tuple of (offset, data) for the requested range of
1949 Returns a 2-tuple of (offset, data) for the requested range of
1946 revisions. Offset is the integer offset from the beginning of the
1950 revisions. Offset is the integer offset from the beginning of the
1947 revlog and data is a str or buffer of the raw byte data.
1951 revlog and data is a str or buffer of the raw byte data.
1948
1952
1949 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1953 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1950 to determine where each revision's data begins and ends.
1954 to determine where each revision's data begins and ends.
1951 """
1955 """
1952 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1956 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1953 # (functions are expensive).
1957 # (functions are expensive).
1954 index = self.index
1958 index = self.index
1955 istart = index[startrev]
1959 istart = index[startrev]
1956 start = int(istart[0] >> 16)
1960 start = int(istart[0] >> 16)
1957 if startrev == endrev:
1961 if startrev == endrev:
1958 end = start + istart[1]
1962 end = start + istart[1]
1959 else:
1963 else:
1960 iend = index[endrev]
1964 iend = index[endrev]
1961 end = int(iend[0] >> 16) + iend[1]
1965 end = int(iend[0] >> 16) + iend[1]
1962
1966
1963 if self._inline:
1967 if self._inline:
1964 start += (startrev + 1) * self.index.entry_size
1968 start += (startrev + 1) * self.index.entry_size
1965 end += (endrev + 1) * self.index.entry_size
1969 end += (endrev + 1) * self.index.entry_size
1966 length = end - start
1970 length = end - start
1967
1971
1968 return start, self._segmentfile.read_chunk(start, length)
1972 return start, self._segmentfile.read_chunk(start, length)
1969
1973
1970 def _chunk(self, rev):
1974 def _chunk(self, rev):
1971 """Obtain a single decompressed chunk for a revision.
1975 """Obtain a single decompressed chunk for a revision.
1972
1976
1973 Accepts an integer revision and an optional already-open file handle
1977 Accepts an integer revision and an optional already-open file handle
1974 to be used for reading. If used, the seek position of the file will not
1978 to be used for reading. If used, the seek position of the file will not
1975 be preserved.
1979 be preserved.
1976
1980
1977 Returns a str holding uncompressed data for the requested revision.
1981 Returns a str holding uncompressed data for the requested revision.
1978 """
1982 """
1979 compression_mode = self.index[rev][10]
1983 compression_mode = self.index[rev][10]
1980 data = self._getsegmentforrevs(rev, rev)[1]
1984 data = self._getsegmentforrevs(rev, rev)[1]
1981 if compression_mode == COMP_MODE_PLAIN:
1985 if compression_mode == COMP_MODE_PLAIN:
1982 return data
1986 return data
1983 elif compression_mode == COMP_MODE_DEFAULT:
1987 elif compression_mode == COMP_MODE_DEFAULT:
1984 return self._decompressor(data)
1988 return self._decompressor(data)
1985 elif compression_mode == COMP_MODE_INLINE:
1989 elif compression_mode == COMP_MODE_INLINE:
1986 return self.decompress(data)
1990 return self.decompress(data)
1987 else:
1991 else:
1988 msg = b'unknown compression mode %d'
1992 msg = b'unknown compression mode %d'
1989 msg %= compression_mode
1993 msg %= compression_mode
1990 raise error.RevlogError(msg)
1994 raise error.RevlogError(msg)
1991
1995
1992 def _chunks(self, revs, targetsize=None):
1996 def _chunks(self, revs, targetsize=None):
1993 """Obtain decompressed chunks for the specified revisions.
1997 """Obtain decompressed chunks for the specified revisions.
1994
1998
1995 Accepts an iterable of numeric revisions that are assumed to be in
1999 Accepts an iterable of numeric revisions that are assumed to be in
1996 ascending order. Also accepts an optional already-open file handle
2000 ascending order. Also accepts an optional already-open file handle
1997 to be used for reading. If used, the seek position of the file will
2001 to be used for reading. If used, the seek position of the file will
1998 not be preserved.
2002 not be preserved.
1999
2003
2000 This function is similar to calling ``self._chunk()`` multiple times,
2004 This function is similar to calling ``self._chunk()`` multiple times,
2001 but is faster.
2005 but is faster.
2002
2006
2003 Returns a list with decompressed data for each requested revision.
2007 Returns a list with decompressed data for each requested revision.
2004 """
2008 """
2005 if not revs:
2009 if not revs:
2006 return []
2010 return []
2007 start = self.start
2011 start = self.start
2008 length = self.length
2012 length = self.length
2009 inline = self._inline
2013 inline = self._inline
2010 iosize = self.index.entry_size
2014 iosize = self.index.entry_size
2011 buffer = util.buffer
2015 buffer = util.buffer
2012
2016
2013 l = []
2017 l = []
2014 ladd = l.append
2018 ladd = l.append
2015
2019
2016 if not self._withsparseread:
2020 if not self._withsparseread:
2017 slicedchunks = (revs,)
2021 slicedchunks = (revs,)
2018 else:
2022 else:
2019 slicedchunks = deltautil.slicechunk(
2023 slicedchunks = deltautil.slicechunk(
2020 self, revs, targetsize=targetsize
2024 self, revs, targetsize=targetsize
2021 )
2025 )
2022
2026
2023 for revschunk in slicedchunks:
2027 for revschunk in slicedchunks:
2024 firstrev = revschunk[0]
2028 firstrev = revschunk[0]
2025 # Skip trailing revisions with empty diff
2029 # Skip trailing revisions with empty diff
2026 for lastrev in revschunk[::-1]:
2030 for lastrev in revschunk[::-1]:
2027 if length(lastrev) != 0:
2031 if length(lastrev) != 0:
2028 break
2032 break
2029
2033
2030 try:
2034 try:
2031 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2035 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2032 except OverflowError:
2036 except OverflowError:
2033 # issue4215 - we can't cache a run of chunks greater than
2037 # issue4215 - we can't cache a run of chunks greater than
2034 # 2G on Windows
2038 # 2G on Windows
2035 return [self._chunk(rev) for rev in revschunk]
2039 return [self._chunk(rev) for rev in revschunk]
2036
2040
2037 decomp = self.decompress
2041 decomp = self.decompress
2038 # self._decompressor might be None, but will not be used in that case
2042 # self._decompressor might be None, but will not be used in that case
2039 def_decomp = self._decompressor
2043 def_decomp = self._decompressor
2040 for rev in revschunk:
2044 for rev in revschunk:
2041 chunkstart = start(rev)
2045 chunkstart = start(rev)
2042 if inline:
2046 if inline:
2043 chunkstart += (rev + 1) * iosize
2047 chunkstart += (rev + 1) * iosize
2044 chunklength = length(rev)
2048 chunklength = length(rev)
2045 comp_mode = self.index[rev][10]
2049 comp_mode = self.index[rev][10]
2046 c = buffer(data, chunkstart - offset, chunklength)
2050 c = buffer(data, chunkstart - offset, chunklength)
2047 if comp_mode == COMP_MODE_PLAIN:
2051 if comp_mode == COMP_MODE_PLAIN:
2048 ladd(c)
2052 ladd(c)
2049 elif comp_mode == COMP_MODE_INLINE:
2053 elif comp_mode == COMP_MODE_INLINE:
2050 ladd(decomp(c))
2054 ladd(decomp(c))
2051 elif comp_mode == COMP_MODE_DEFAULT:
2055 elif comp_mode == COMP_MODE_DEFAULT:
2052 ladd(def_decomp(c))
2056 ladd(def_decomp(c))
2053 else:
2057 else:
2054 msg = b'unknown compression mode %d'
2058 msg = b'unknown compression mode %d'
2055 msg %= comp_mode
2059 msg %= comp_mode
2056 raise error.RevlogError(msg)
2060 raise error.RevlogError(msg)
2057
2061
2058 return l
2062 return l
2059
2063
2060 def deltaparent(self, rev):
2064 def deltaparent(self, rev):
2061 """return deltaparent of the given revision"""
2065 """return deltaparent of the given revision"""
2062 base = self.index[rev][3]
2066 base = self.index[rev][3]
2063 if base == rev:
2067 if base == rev:
2064 return nullrev
2068 return nullrev
2065 elif self.delta_config.general_delta:
2069 elif self.delta_config.general_delta:
2066 return base
2070 return base
2067 else:
2071 else:
2068 return rev - 1
2072 return rev - 1
2069
2073
2070 def issnapshot(self, rev):
2074 def issnapshot(self, rev):
2071 """tells whether rev is a snapshot"""
2075 """tells whether rev is a snapshot"""
2072 if not self._sparserevlog:
2076 if not self._sparserevlog:
2073 return self.deltaparent(rev) == nullrev
2077 return self.deltaparent(rev) == nullrev
2074 elif hasattr(self.index, 'issnapshot'):
2078 elif hasattr(self.index, 'issnapshot'):
2075 # directly assign the method to cache the testing and access
2079 # directly assign the method to cache the testing and access
2076 self.issnapshot = self.index.issnapshot
2080 self.issnapshot = self.index.issnapshot
2077 return self.issnapshot(rev)
2081 return self.issnapshot(rev)
2078 if rev == nullrev:
2082 if rev == nullrev:
2079 return True
2083 return True
2080 entry = self.index[rev]
2084 entry = self.index[rev]
2081 base = entry[3]
2085 base = entry[3]
2082 if base == rev:
2086 if base == rev:
2083 return True
2087 return True
2084 if base == nullrev:
2088 if base == nullrev:
2085 return True
2089 return True
2086 p1 = entry[5]
2090 p1 = entry[5]
2087 while self.length(p1) == 0:
2091 while self.length(p1) == 0:
2088 b = self.deltaparent(p1)
2092 b = self.deltaparent(p1)
2089 if b == p1:
2093 if b == p1:
2090 break
2094 break
2091 p1 = b
2095 p1 = b
2092 p2 = entry[6]
2096 p2 = entry[6]
2093 while self.length(p2) == 0:
2097 while self.length(p2) == 0:
2094 b = self.deltaparent(p2)
2098 b = self.deltaparent(p2)
2095 if b == p2:
2099 if b == p2:
2096 break
2100 break
2097 p2 = b
2101 p2 = b
2098 if base == p1 or base == p2:
2102 if base == p1 or base == p2:
2099 return False
2103 return False
2100 return self.issnapshot(base)
2104 return self.issnapshot(base)
2101
2105
2102 def snapshotdepth(self, rev):
2106 def snapshotdepth(self, rev):
2103 """number of snapshot in the chain before this one"""
2107 """number of snapshot in the chain before this one"""
2104 if not self.issnapshot(rev):
2108 if not self.issnapshot(rev):
2105 raise error.ProgrammingError(b'revision %d not a snapshot')
2109 raise error.ProgrammingError(b'revision %d not a snapshot')
2106 return len(self._deltachain(rev)[0]) - 1
2110 return len(self._deltachain(rev)[0]) - 1
2107
2111
2108 def revdiff(self, rev1, rev2):
2112 def revdiff(self, rev1, rev2):
2109 """return or calculate a delta between two revisions
2113 """return or calculate a delta between two revisions
2110
2114
2111 The delta calculated is in binary form and is intended to be written to
2115 The delta calculated is in binary form and is intended to be written to
2112 revlog data directly. So this function needs raw revision data.
2116 revlog data directly. So this function needs raw revision data.
2113 """
2117 """
2114 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2118 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2115 return bytes(self._chunk(rev2))
2119 return bytes(self._chunk(rev2))
2116
2120
2117 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2121 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2118
2122
2119 def revision(self, nodeorrev):
2123 def revision(self, nodeorrev):
2120 """return an uncompressed revision of a given node or revision
2124 """return an uncompressed revision of a given node or revision
2121 number.
2125 number.
2122 """
2126 """
2123 return self._revisiondata(nodeorrev)
2127 return self._revisiondata(nodeorrev)
2124
2128
2125 def sidedata(self, nodeorrev):
2129 def sidedata(self, nodeorrev):
2126 """a map of extra data related to the changeset but not part of the hash
2130 """a map of extra data related to the changeset but not part of the hash
2127
2131
2128 This function currently return a dictionary. However, more advanced
2132 This function currently return a dictionary. However, more advanced
2129 mapping object will likely be used in the future for a more
2133 mapping object will likely be used in the future for a more
2130 efficient/lazy code.
2134 efficient/lazy code.
2131 """
2135 """
2132 # deal with <nodeorrev> argument type
2136 # deal with <nodeorrev> argument type
2133 if isinstance(nodeorrev, int):
2137 if isinstance(nodeorrev, int):
2134 rev = nodeorrev
2138 rev = nodeorrev
2135 else:
2139 else:
2136 rev = self.rev(nodeorrev)
2140 rev = self.rev(nodeorrev)
2137 return self._sidedata(rev)
2141 return self._sidedata(rev)
2138
2142
2139 def _revisiondata(self, nodeorrev, raw=False):
2143 def _revisiondata(self, nodeorrev, raw=False):
2140 # deal with <nodeorrev> argument type
2144 # deal with <nodeorrev> argument type
2141 if isinstance(nodeorrev, int):
2145 if isinstance(nodeorrev, int):
2142 rev = nodeorrev
2146 rev = nodeorrev
2143 node = self.node(rev)
2147 node = self.node(rev)
2144 else:
2148 else:
2145 node = nodeorrev
2149 node = nodeorrev
2146 rev = None
2150 rev = None
2147
2151
2148 # fast path the special `nullid` rev
2152 # fast path the special `nullid` rev
2149 if node == self.nullid:
2153 if node == self.nullid:
2150 return b""
2154 return b""
2151
2155
2152 # ``rawtext`` is the text as stored inside the revlog. Might be the
2156 # ``rawtext`` is the text as stored inside the revlog. Might be the
2153 # revision or might need to be processed to retrieve the revision.
2157 # revision or might need to be processed to retrieve the revision.
2154 rev, rawtext, validated = self._rawtext(node, rev)
2158 rev, rawtext, validated = self._rawtext(node, rev)
2155
2159
2156 if raw and validated:
2160 if raw and validated:
2157 # if we don't want to process the raw text and that raw
2161 # if we don't want to process the raw text and that raw
2158 # text is cached, we can exit early.
2162 # text is cached, we can exit early.
2159 return rawtext
2163 return rawtext
2160 if rev is None:
2164 if rev is None:
2161 rev = self.rev(node)
2165 rev = self.rev(node)
2162 # the revlog's flag for this revision
2166 # the revlog's flag for this revision
2163 # (usually alter its state or content)
2167 # (usually alter its state or content)
2164 flags = self.flags(rev)
2168 flags = self.flags(rev)
2165
2169
2166 if validated and flags == REVIDX_DEFAULT_FLAGS:
2170 if validated and flags == REVIDX_DEFAULT_FLAGS:
2167 # no extra flags set, no flag processor runs, text = rawtext
2171 # no extra flags set, no flag processor runs, text = rawtext
2168 return rawtext
2172 return rawtext
2169
2173
2170 if raw:
2174 if raw:
2171 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2175 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2172 text = rawtext
2176 text = rawtext
2173 else:
2177 else:
2174 r = flagutil.processflagsread(self, rawtext, flags)
2178 r = flagutil.processflagsread(self, rawtext, flags)
2175 text, validatehash = r
2179 text, validatehash = r
2176 if validatehash:
2180 if validatehash:
2177 self.checkhash(text, node, rev=rev)
2181 self.checkhash(text, node, rev=rev)
2178 if not validated:
2182 if not validated:
2179 self._revisioncache = (node, rev, rawtext)
2183 self._revisioncache = (node, rev, rawtext)
2180
2184
2181 return text
2185 return text
2182
2186
2183 def _rawtext(self, node, rev):
2187 def _rawtext(self, node, rev):
2184 """return the possibly unvalidated rawtext for a revision
2188 """return the possibly unvalidated rawtext for a revision
2185
2189
2186 returns (rev, rawtext, validated)
2190 returns (rev, rawtext, validated)
2187 """
2191 """
2188
2192
2189 # revision in the cache (could be useful to apply delta)
2193 # revision in the cache (could be useful to apply delta)
2190 cachedrev = None
2194 cachedrev = None
2191 # An intermediate text to apply deltas to
2195 # An intermediate text to apply deltas to
2192 basetext = None
2196 basetext = None
2193
2197
2194 # Check if we have the entry in cache
2198 # Check if we have the entry in cache
2195 # The cache entry looks like (node, rev, rawtext)
2199 # The cache entry looks like (node, rev, rawtext)
2196 if self._revisioncache:
2200 if self._revisioncache:
2197 if self._revisioncache[0] == node:
2201 if self._revisioncache[0] == node:
2198 return (rev, self._revisioncache[2], True)
2202 return (rev, self._revisioncache[2], True)
2199 cachedrev = self._revisioncache[1]
2203 cachedrev = self._revisioncache[1]
2200
2204
2201 if rev is None:
2205 if rev is None:
2202 rev = self.rev(node)
2206 rev = self.rev(node)
2203
2207
2204 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2208 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2205 if stopped:
2209 if stopped:
2206 basetext = self._revisioncache[2]
2210 basetext = self._revisioncache[2]
2207
2211
2208 # drop cache to save memory, the caller is expected to
2212 # drop cache to save memory, the caller is expected to
2209 # update self._revisioncache after validating the text
2213 # update self._revisioncache after validating the text
2210 self._revisioncache = None
2214 self._revisioncache = None
2211
2215
2212 targetsize = None
2216 targetsize = None
2213 rawsize = self.index[rev][2]
2217 rawsize = self.index[rev][2]
2214 if 0 <= rawsize:
2218 if 0 <= rawsize:
2215 targetsize = 4 * rawsize
2219 targetsize = 4 * rawsize
2216
2220
2217 bins = self._chunks(chain, targetsize=targetsize)
2221 bins = self._chunks(chain, targetsize=targetsize)
2218 if basetext is None:
2222 if basetext is None:
2219 basetext = bytes(bins[0])
2223 basetext = bytes(bins[0])
2220 bins = bins[1:]
2224 bins = bins[1:]
2221
2225
2222 rawtext = mdiff.patches(basetext, bins)
2226 rawtext = mdiff.patches(basetext, bins)
2223 del basetext # let us have a chance to free memory early
2227 del basetext # let us have a chance to free memory early
2224 return (rev, rawtext, False)
2228 return (rev, rawtext, False)
2225
2229
2226 def _sidedata(self, rev):
2230 def _sidedata(self, rev):
2227 """Return the sidedata for a given revision number."""
2231 """Return the sidedata for a given revision number."""
2228 index_entry = self.index[rev]
2232 index_entry = self.index[rev]
2229 sidedata_offset = index_entry[8]
2233 sidedata_offset = index_entry[8]
2230 sidedata_size = index_entry[9]
2234 sidedata_size = index_entry[9]
2231
2235
2232 if self._inline:
2236 if self._inline:
2233 sidedata_offset += self.index.entry_size * (1 + rev)
2237 sidedata_offset += self.index.entry_size * (1 + rev)
2234 if sidedata_size == 0:
2238 if sidedata_size == 0:
2235 return {}
2239 return {}
2236
2240
2237 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2241 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2238 filename = self._sidedatafile
2242 filename = self._sidedatafile
2239 end = self._docket.sidedata_end
2243 end = self._docket.sidedata_end
2240 offset = sidedata_offset
2244 offset = sidedata_offset
2241 length = sidedata_size
2245 length = sidedata_size
2242 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2246 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2243 raise error.RevlogError(m)
2247 raise error.RevlogError(m)
2244
2248
2245 comp_segment = self._segmentfile_sidedata.read_chunk(
2249 comp_segment = self._segmentfile_sidedata.read_chunk(
2246 sidedata_offset, sidedata_size
2250 sidedata_offset, sidedata_size
2247 )
2251 )
2248
2252
2249 comp = self.index[rev][11]
2253 comp = self.index[rev][11]
2250 if comp == COMP_MODE_PLAIN:
2254 if comp == COMP_MODE_PLAIN:
2251 segment = comp_segment
2255 segment = comp_segment
2252 elif comp == COMP_MODE_DEFAULT:
2256 elif comp == COMP_MODE_DEFAULT:
2253 segment = self._decompressor(comp_segment)
2257 segment = self._decompressor(comp_segment)
2254 elif comp == COMP_MODE_INLINE:
2258 elif comp == COMP_MODE_INLINE:
2255 segment = self.decompress(comp_segment)
2259 segment = self.decompress(comp_segment)
2256 else:
2260 else:
2257 msg = b'unknown compression mode %d'
2261 msg = b'unknown compression mode %d'
2258 msg %= comp
2262 msg %= comp
2259 raise error.RevlogError(msg)
2263 raise error.RevlogError(msg)
2260
2264
2261 sidedata = sidedatautil.deserialize_sidedata(segment)
2265 sidedata = sidedatautil.deserialize_sidedata(segment)
2262 return sidedata
2266 return sidedata
2263
2267
2264 def rawdata(self, nodeorrev):
2268 def rawdata(self, nodeorrev):
2265 """return an uncompressed raw data of a given node or revision number."""
2269 """return an uncompressed raw data of a given node or revision number."""
2266 return self._revisiondata(nodeorrev, raw=True)
2270 return self._revisiondata(nodeorrev, raw=True)
2267
2271
2268 def hash(self, text, p1, p2):
2272 def hash(self, text, p1, p2):
2269 """Compute a node hash.
2273 """Compute a node hash.
2270
2274
2271 Available as a function so that subclasses can replace the hash
2275 Available as a function so that subclasses can replace the hash
2272 as needed.
2276 as needed.
2273 """
2277 """
2274 return storageutil.hashrevisionsha1(text, p1, p2)
2278 return storageutil.hashrevisionsha1(text, p1, p2)
2275
2279
2276 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2280 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2277 """Check node hash integrity.
2281 """Check node hash integrity.
2278
2282
2279 Available as a function so that subclasses can extend hash mismatch
2283 Available as a function so that subclasses can extend hash mismatch
2280 behaviors as needed.
2284 behaviors as needed.
2281 """
2285 """
2282 try:
2286 try:
2283 if p1 is None and p2 is None:
2287 if p1 is None and p2 is None:
2284 p1, p2 = self.parents(node)
2288 p1, p2 = self.parents(node)
2285 if node != self.hash(text, p1, p2):
2289 if node != self.hash(text, p1, p2):
2286 # Clear the revision cache on hash failure. The revision cache
2290 # Clear the revision cache on hash failure. The revision cache
2287 # only stores the raw revision and clearing the cache does have
2291 # only stores the raw revision and clearing the cache does have
2288 # the side-effect that we won't have a cache hit when the raw
2292 # the side-effect that we won't have a cache hit when the raw
2289 # revision data is accessed. But this case should be rare and
2293 # revision data is accessed. But this case should be rare and
2290 # it is extra work to teach the cache about the hash
2294 # it is extra work to teach the cache about the hash
2291 # verification state.
2295 # verification state.
2292 if self._revisioncache and self._revisioncache[0] == node:
2296 if self._revisioncache and self._revisioncache[0] == node:
2293 self._revisioncache = None
2297 self._revisioncache = None
2294
2298
2295 revornode = rev
2299 revornode = rev
2296 if revornode is None:
2300 if revornode is None:
2297 revornode = templatefilters.short(hex(node))
2301 revornode = templatefilters.short(hex(node))
2298 raise error.RevlogError(
2302 raise error.RevlogError(
2299 _(b"integrity check failed on %s:%s")
2303 _(b"integrity check failed on %s:%s")
2300 % (self.display_id, pycompat.bytestr(revornode))
2304 % (self.display_id, pycompat.bytestr(revornode))
2301 )
2305 )
2302 except error.RevlogError:
2306 except error.RevlogError:
2303 if self._censorable and storageutil.iscensoredtext(text):
2307 if self._censorable and storageutil.iscensoredtext(text):
2304 raise error.CensoredNodeError(self.display_id, node, text)
2308 raise error.CensoredNodeError(self.display_id, node, text)
2305 raise
2309 raise
2306
2310
2307 @property
2311 @property
2308 def _split_index_file(self):
2312 def _split_index_file(self):
2309 """the path where to expect the index of an ongoing splitting operation
2313 """the path where to expect the index of an ongoing splitting operation
2310
2314
2311 The file will only exist if a splitting operation is in progress, but
2315 The file will only exist if a splitting operation is in progress, but
2312 it is always expected at the same location."""
2316 it is always expected at the same location."""
2313 parts = self.radix.split(b'/')
2317 parts = self.radix.split(b'/')
2314 if len(parts) > 1:
2318 if len(parts) > 1:
2315 # adds a '-s' prefix to the ``data/` or `meta/` base
2319 # adds a '-s' prefix to the ``data/` or `meta/` base
2316 head = parts[0] + b'-s'
2320 head = parts[0] + b'-s'
2317 mids = parts[1:-1]
2321 mids = parts[1:-1]
2318 tail = parts[-1] + b'.i'
2322 tail = parts[-1] + b'.i'
2319 pieces = [head] + mids + [tail]
2323 pieces = [head] + mids + [tail]
2320 return b'/'.join(pieces)
2324 return b'/'.join(pieces)
2321 else:
2325 else:
2322 # the revlog is stored at the root of the store (changelog or
2326 # the revlog is stored at the root of the store (changelog or
2323 # manifest), no risk of collision.
2327 # manifest), no risk of collision.
2324 return self.radix + b'.i.s'
2328 return self.radix + b'.i.s'
2325
2329
2326 def _enforceinlinesize(self, tr, side_write=True):
2330 def _enforceinlinesize(self, tr, side_write=True):
2327 """Check if the revlog is too big for inline and convert if so.
2331 """Check if the revlog is too big for inline and convert if so.
2328
2332
2329 This should be called after revisions are added to the revlog. If the
2333 This should be called after revisions are added to the revlog. If the
2330 revlog has grown too large to be an inline revlog, it will convert it
2334 revlog has grown too large to be an inline revlog, it will convert it
2331 to use multiple index and data files.
2335 to use multiple index and data files.
2332 """
2336 """
2333 tiprev = len(self) - 1
2337 tiprev = len(self) - 1
2334 total_size = self.start(tiprev) + self.length(tiprev)
2338 total_size = self.start(tiprev) + self.length(tiprev)
2335 if not self._inline or total_size < _maxinline:
2339 if not self._inline or total_size < _maxinline:
2336 return
2340 return
2337
2341
2338 troffset = tr.findoffset(self._indexfile)
2342 troffset = tr.findoffset(self._indexfile)
2339 if troffset is None:
2343 if troffset is None:
2340 raise error.RevlogError(
2344 raise error.RevlogError(
2341 _(b"%s not found in the transaction") % self._indexfile
2345 _(b"%s not found in the transaction") % self._indexfile
2342 )
2346 )
2343 if troffset:
2347 if troffset:
2344 tr.addbackup(self._indexfile, for_offset=True)
2348 tr.addbackup(self._indexfile, for_offset=True)
2345 tr.add(self._datafile, 0)
2349 tr.add(self._datafile, 0)
2346
2350
2347 existing_handles = False
2351 existing_handles = False
2348 if self._writinghandles is not None:
2352 if self._writinghandles is not None:
2349 existing_handles = True
2353 existing_handles = True
2350 fp = self._writinghandles[0]
2354 fp = self._writinghandles[0]
2351 fp.flush()
2355 fp.flush()
2352 fp.close()
2356 fp.close()
2353 # We can't use the cached file handle after close(). So prevent
2357 # We can't use the cached file handle after close(). So prevent
2354 # its usage.
2358 # its usage.
2355 self._writinghandles = None
2359 self._writinghandles = None
2356 self._segmentfile.writing_handle = None
2360 self._segmentfile.writing_handle = None
2357 # No need to deal with sidedata writing handle as it is only
2361 # No need to deal with sidedata writing handle as it is only
2358 # relevant with revlog-v2 which is never inline, not reaching
2362 # relevant with revlog-v2 which is never inline, not reaching
2359 # this code
2363 # this code
2360 if side_write:
2364 if side_write:
2361 old_index_file_path = self._indexfile
2365 old_index_file_path = self._indexfile
2362 new_index_file_path = self._split_index_file
2366 new_index_file_path = self._split_index_file
2363 opener = self.opener
2367 opener = self.opener
2364 weak_self = weakref.ref(self)
2368 weak_self = weakref.ref(self)
2365
2369
2366 # the "split" index replace the real index when the transaction is finalized
2370 # the "split" index replace the real index when the transaction is finalized
2367 def finalize_callback(tr):
2371 def finalize_callback(tr):
2368 opener.rename(
2372 opener.rename(
2369 new_index_file_path,
2373 new_index_file_path,
2370 old_index_file_path,
2374 old_index_file_path,
2371 checkambig=True,
2375 checkambig=True,
2372 )
2376 )
2373 maybe_self = weak_self()
2377 maybe_self = weak_self()
2374 if maybe_self is not None:
2378 if maybe_self is not None:
2375 maybe_self._indexfile = old_index_file_path
2379 maybe_self._indexfile = old_index_file_path
2376
2380
2377 def abort_callback(tr):
2381 def abort_callback(tr):
2378 maybe_self = weak_self()
2382 maybe_self = weak_self()
2379 if maybe_self is not None:
2383 if maybe_self is not None:
2380 maybe_self._indexfile = old_index_file_path
2384 maybe_self._indexfile = old_index_file_path
2381
2385
2382 tr.registertmp(new_index_file_path)
2386 tr.registertmp(new_index_file_path)
2383 if self.target[1] is not None:
2387 if self.target[1] is not None:
2384 callback_id = b'000-revlog-split-%d-%s' % self.target
2388 callback_id = b'000-revlog-split-%d-%s' % self.target
2385 else:
2389 else:
2386 callback_id = b'000-revlog-split-%d' % self.target[0]
2390 callback_id = b'000-revlog-split-%d' % self.target[0]
2387 tr.addfinalize(callback_id, finalize_callback)
2391 tr.addfinalize(callback_id, finalize_callback)
2388 tr.addabort(callback_id, abort_callback)
2392 tr.addabort(callback_id, abort_callback)
2389
2393
2390 new_dfh = self._datafp(b'w+')
2394 new_dfh = self._datafp(b'w+')
2391 new_dfh.truncate(0) # drop any potentially existing data
2395 new_dfh.truncate(0) # drop any potentially existing data
2392 try:
2396 try:
2393 with self.reading():
2397 with self.reading():
2394 for r in self:
2398 for r in self:
2395 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2399 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2396 new_dfh.flush()
2400 new_dfh.flush()
2397
2401
2398 if side_write:
2402 if side_write:
2399 self._indexfile = new_index_file_path
2403 self._indexfile = new_index_file_path
2400 with self.__index_new_fp() as fp:
2404 with self.__index_new_fp() as fp:
2401 self._format_flags &= ~FLAG_INLINE_DATA
2405 self._format_flags &= ~FLAG_INLINE_DATA
2402 self._inline = False
2406 self._inline = False
2403 for i in self:
2407 for i in self:
2404 e = self.index.entry_binary(i)
2408 e = self.index.entry_binary(i)
2405 if i == 0 and self._docket is None:
2409 if i == 0 and self._docket is None:
2406 header = self._format_flags | self._format_version
2410 header = self._format_flags | self._format_version
2407 header = self.index.pack_header(header)
2411 header = self.index.pack_header(header)
2408 e = header + e
2412 e = header + e
2409 fp.write(e)
2413 fp.write(e)
2410 if self._docket is not None:
2414 if self._docket is not None:
2411 self._docket.index_end = fp.tell()
2415 self._docket.index_end = fp.tell()
2412
2416
2413 # If we don't use side-write, the temp file replace the real
2417 # If we don't use side-write, the temp file replace the real
2414 # index when we exit the context manager
2418 # index when we exit the context manager
2415
2419
2416 nodemaputil.setup_persistent_nodemap(tr, self)
2420 nodemaputil.setup_persistent_nodemap(tr, self)
2417 self._segmentfile = randomaccessfile.randomaccessfile(
2421 self._segmentfile = randomaccessfile.randomaccessfile(
2418 self.opener,
2422 self.opener,
2419 self._datafile,
2423 self._datafile,
2420 self._chunkcachesize,
2424 self._chunkcachesize,
2421 )
2425 )
2422
2426
2423 if existing_handles:
2427 if existing_handles:
2424 # switched from inline to conventional reopen the index
2428 # switched from inline to conventional reopen the index
2425 ifh = self.__index_write_fp()
2429 ifh = self.__index_write_fp()
2426 self._writinghandles = (ifh, new_dfh, None)
2430 self._writinghandles = (ifh, new_dfh, None)
2427 self._segmentfile.writing_handle = new_dfh
2431 self._segmentfile.writing_handle = new_dfh
2428 new_dfh = None
2432 new_dfh = None
2429 # No need to deal with sidedata writing handle as it is only
2433 # No need to deal with sidedata writing handle as it is only
2430 # relevant with revlog-v2 which is never inline, not reaching
2434 # relevant with revlog-v2 which is never inline, not reaching
2431 # this code
2435 # this code
2432 finally:
2436 finally:
2433 if new_dfh is not None:
2437 if new_dfh is not None:
2434 new_dfh.close()
2438 new_dfh.close()
2435
2439
2436 def _nodeduplicatecallback(self, transaction, node):
2440 def _nodeduplicatecallback(self, transaction, node):
2437 """called when trying to add a node already stored."""
2441 """called when trying to add a node already stored."""
2438
2442
2439 @contextlib.contextmanager
2443 @contextlib.contextmanager
2440 def reading(self):
2444 def reading(self):
2441 """Context manager that keeps data and sidedata files open for reading"""
2445 """Context manager that keeps data and sidedata files open for reading"""
2442 if len(self.index) == 0:
2446 if len(self.index) == 0:
2443 yield # nothing to be read
2447 yield # nothing to be read
2444 else:
2448 else:
2445 with self._segmentfile.reading():
2449 with self._segmentfile.reading():
2446 with self._segmentfile_sidedata.reading():
2450 with self._segmentfile_sidedata.reading():
2447 yield
2451 yield
2448
2452
2449 @contextlib.contextmanager
2453 @contextlib.contextmanager
2450 def _writing(self, transaction):
2454 def _writing(self, transaction):
2451 if self._trypending:
2455 if self._trypending:
2452 msg = b'try to write in a `trypending` revlog: %s'
2456 msg = b'try to write in a `trypending` revlog: %s'
2453 msg %= self.display_id
2457 msg %= self.display_id
2454 raise error.ProgrammingError(msg)
2458 raise error.ProgrammingError(msg)
2455 if self._writinghandles is not None:
2459 if self._writinghandles is not None:
2456 yield
2460 yield
2457 else:
2461 else:
2458 ifh = dfh = sdfh = None
2462 ifh = dfh = sdfh = None
2459 try:
2463 try:
2460 r = len(self)
2464 r = len(self)
2461 # opening the data file.
2465 # opening the data file.
2462 dsize = 0
2466 dsize = 0
2463 if r:
2467 if r:
2464 dsize = self.end(r - 1)
2468 dsize = self.end(r - 1)
2465 dfh = None
2469 dfh = None
2466 if not self._inline:
2470 if not self._inline:
2467 try:
2471 try:
2468 dfh = self._datafp(b"r+")
2472 dfh = self._datafp(b"r+")
2469 if self._docket is None:
2473 if self._docket is None:
2470 dfh.seek(0, os.SEEK_END)
2474 dfh.seek(0, os.SEEK_END)
2471 else:
2475 else:
2472 dfh.seek(self._docket.data_end, os.SEEK_SET)
2476 dfh.seek(self._docket.data_end, os.SEEK_SET)
2473 except FileNotFoundError:
2477 except FileNotFoundError:
2474 dfh = self._datafp(b"w+")
2478 dfh = self._datafp(b"w+")
2475 transaction.add(self._datafile, dsize)
2479 transaction.add(self._datafile, dsize)
2476 if self._sidedatafile is not None:
2480 if self._sidedatafile is not None:
2477 # revlog-v2 does not inline, help Pytype
2481 # revlog-v2 does not inline, help Pytype
2478 assert dfh is not None
2482 assert dfh is not None
2479 try:
2483 try:
2480 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2484 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2481 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2485 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2482 except FileNotFoundError:
2486 except FileNotFoundError:
2483 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2487 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2484 transaction.add(
2488 transaction.add(
2485 self._sidedatafile, self._docket.sidedata_end
2489 self._sidedatafile, self._docket.sidedata_end
2486 )
2490 )
2487
2491
2488 # opening the index file.
2492 # opening the index file.
2489 isize = r * self.index.entry_size
2493 isize = r * self.index.entry_size
2490 ifh = self.__index_write_fp()
2494 ifh = self.__index_write_fp()
2491 if self._inline:
2495 if self._inline:
2492 transaction.add(self._indexfile, dsize + isize)
2496 transaction.add(self._indexfile, dsize + isize)
2493 else:
2497 else:
2494 transaction.add(self._indexfile, isize)
2498 transaction.add(self._indexfile, isize)
2495 # exposing all file handle for writing.
2499 # exposing all file handle for writing.
2496 self._writinghandles = (ifh, dfh, sdfh)
2500 self._writinghandles = (ifh, dfh, sdfh)
2497 self._segmentfile.writing_handle = ifh if self._inline else dfh
2501 self._segmentfile.writing_handle = ifh if self._inline else dfh
2498 self._segmentfile_sidedata.writing_handle = sdfh
2502 self._segmentfile_sidedata.writing_handle = sdfh
2499 yield
2503 yield
2500 if self._docket is not None:
2504 if self._docket is not None:
2501 self._write_docket(transaction)
2505 self._write_docket(transaction)
2502 finally:
2506 finally:
2503 self._writinghandles = None
2507 self._writinghandles = None
2504 self._segmentfile.writing_handle = None
2508 self._segmentfile.writing_handle = None
2505 self._segmentfile_sidedata.writing_handle = None
2509 self._segmentfile_sidedata.writing_handle = None
2506 if dfh is not None:
2510 if dfh is not None:
2507 dfh.close()
2511 dfh.close()
2508 if sdfh is not None:
2512 if sdfh is not None:
2509 sdfh.close()
2513 sdfh.close()
2510 # closing the index file last to avoid exposing referent to
2514 # closing the index file last to avoid exposing referent to
2511 # potential unflushed data content.
2515 # potential unflushed data content.
2512 if ifh is not None:
2516 if ifh is not None:
2513 ifh.close()
2517 ifh.close()
2514
2518
2515 def _write_docket(self, transaction):
2519 def _write_docket(self, transaction):
2516 """write the current docket on disk
2520 """write the current docket on disk
2517
2521
2518 Exist as a method to help changelog to implement transaction logic
2522 Exist as a method to help changelog to implement transaction logic
2519
2523
2520 We could also imagine using the same transaction logic for all revlog
2524 We could also imagine using the same transaction logic for all revlog
2521 since docket are cheap."""
2525 since docket are cheap."""
2522 self._docket.write(transaction)
2526 self._docket.write(transaction)
2523
2527
2524 def addrevision(
2528 def addrevision(
2525 self,
2529 self,
2526 text,
2530 text,
2527 transaction,
2531 transaction,
2528 link,
2532 link,
2529 p1,
2533 p1,
2530 p2,
2534 p2,
2531 cachedelta=None,
2535 cachedelta=None,
2532 node=None,
2536 node=None,
2533 flags=REVIDX_DEFAULT_FLAGS,
2537 flags=REVIDX_DEFAULT_FLAGS,
2534 deltacomputer=None,
2538 deltacomputer=None,
2535 sidedata=None,
2539 sidedata=None,
2536 ):
2540 ):
2537 """add a revision to the log
2541 """add a revision to the log
2538
2542
2539 text - the revision data to add
2543 text - the revision data to add
2540 transaction - the transaction object used for rollback
2544 transaction - the transaction object used for rollback
2541 link - the linkrev data to add
2545 link - the linkrev data to add
2542 p1, p2 - the parent nodeids of the revision
2546 p1, p2 - the parent nodeids of the revision
2543 cachedelta - an optional precomputed delta
2547 cachedelta - an optional precomputed delta
2544 node - nodeid of revision; typically node is not specified, and it is
2548 node - nodeid of revision; typically node is not specified, and it is
2545 computed by default as hash(text, p1, p2), however subclasses might
2549 computed by default as hash(text, p1, p2), however subclasses might
2546 use different hashing method (and override checkhash() in such case)
2550 use different hashing method (and override checkhash() in such case)
2547 flags - the known flags to set on the revision
2551 flags - the known flags to set on the revision
2548 deltacomputer - an optional deltacomputer instance shared between
2552 deltacomputer - an optional deltacomputer instance shared between
2549 multiple calls
2553 multiple calls
2550 """
2554 """
2551 if link == nullrev:
2555 if link == nullrev:
2552 raise error.RevlogError(
2556 raise error.RevlogError(
2553 _(b"attempted to add linkrev -1 to %s") % self.display_id
2557 _(b"attempted to add linkrev -1 to %s") % self.display_id
2554 )
2558 )
2555
2559
2556 if sidedata is None:
2560 if sidedata is None:
2557 sidedata = {}
2561 sidedata = {}
2558 elif sidedata and not self.hassidedata:
2562 elif sidedata and not self.hassidedata:
2559 raise error.ProgrammingError(
2563 raise error.ProgrammingError(
2560 _(b"trying to add sidedata to a revlog who don't support them")
2564 _(b"trying to add sidedata to a revlog who don't support them")
2561 )
2565 )
2562
2566
2563 if flags:
2567 if flags:
2564 node = node or self.hash(text, p1, p2)
2568 node = node or self.hash(text, p1, p2)
2565
2569
2566 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2570 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2567
2571
2568 # If the flag processor modifies the revision data, ignore any provided
2572 # If the flag processor modifies the revision data, ignore any provided
2569 # cachedelta.
2573 # cachedelta.
2570 if rawtext != text:
2574 if rawtext != text:
2571 cachedelta = None
2575 cachedelta = None
2572
2576
2573 if len(rawtext) > _maxentrysize:
2577 if len(rawtext) > _maxentrysize:
2574 raise error.RevlogError(
2578 raise error.RevlogError(
2575 _(
2579 _(
2576 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2580 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2577 )
2581 )
2578 % (self.display_id, len(rawtext))
2582 % (self.display_id, len(rawtext))
2579 )
2583 )
2580
2584
2581 node = node or self.hash(rawtext, p1, p2)
2585 node = node or self.hash(rawtext, p1, p2)
2582 rev = self.index.get_rev(node)
2586 rev = self.index.get_rev(node)
2583 if rev is not None:
2587 if rev is not None:
2584 return rev
2588 return rev
2585
2589
2586 if validatehash:
2590 if validatehash:
2587 self.checkhash(rawtext, node, p1=p1, p2=p2)
2591 self.checkhash(rawtext, node, p1=p1, p2=p2)
2588
2592
2589 return self.addrawrevision(
2593 return self.addrawrevision(
2590 rawtext,
2594 rawtext,
2591 transaction,
2595 transaction,
2592 link,
2596 link,
2593 p1,
2597 p1,
2594 p2,
2598 p2,
2595 node,
2599 node,
2596 flags,
2600 flags,
2597 cachedelta=cachedelta,
2601 cachedelta=cachedelta,
2598 deltacomputer=deltacomputer,
2602 deltacomputer=deltacomputer,
2599 sidedata=sidedata,
2603 sidedata=sidedata,
2600 )
2604 )
2601
2605
2602 def addrawrevision(
2606 def addrawrevision(
2603 self,
2607 self,
2604 rawtext,
2608 rawtext,
2605 transaction,
2609 transaction,
2606 link,
2610 link,
2607 p1,
2611 p1,
2608 p2,
2612 p2,
2609 node,
2613 node,
2610 flags,
2614 flags,
2611 cachedelta=None,
2615 cachedelta=None,
2612 deltacomputer=None,
2616 deltacomputer=None,
2613 sidedata=None,
2617 sidedata=None,
2614 ):
2618 ):
2615 """add a raw revision with known flags, node and parents
2619 """add a raw revision with known flags, node and parents
2616 useful when reusing a revision not stored in this revlog (ex: received
2620 useful when reusing a revision not stored in this revlog (ex: received
2617 over wire, or read from an external bundle).
2621 over wire, or read from an external bundle).
2618 """
2622 """
2619 with self._writing(transaction):
2623 with self._writing(transaction):
2620 return self._addrevision(
2624 return self._addrevision(
2621 node,
2625 node,
2622 rawtext,
2626 rawtext,
2623 transaction,
2627 transaction,
2624 link,
2628 link,
2625 p1,
2629 p1,
2626 p2,
2630 p2,
2627 flags,
2631 flags,
2628 cachedelta,
2632 cachedelta,
2629 deltacomputer=deltacomputer,
2633 deltacomputer=deltacomputer,
2630 sidedata=sidedata,
2634 sidedata=sidedata,
2631 )
2635 )
2632
2636
2633 def compress(self, data):
2637 def compress(self, data):
2634 """Generate a possibly-compressed representation of data."""
2638 """Generate a possibly-compressed representation of data."""
2635 if not data:
2639 if not data:
2636 return b'', data
2640 return b'', data
2637
2641
2638 compressed = self._compressor.compress(data)
2642 compressed = self._compressor.compress(data)
2639
2643
2640 if compressed:
2644 if compressed:
2641 # The revlog compressor added the header in the returned data.
2645 # The revlog compressor added the header in the returned data.
2642 return b'', compressed
2646 return b'', compressed
2643
2647
2644 if data[0:1] == b'\0':
2648 if data[0:1] == b'\0':
2645 return b'', data
2649 return b'', data
2646 return b'u', data
2650 return b'u', data
2647
2651
2648 def decompress(self, data):
2652 def decompress(self, data):
2649 """Decompress a revlog chunk.
2653 """Decompress a revlog chunk.
2650
2654
2651 The chunk is expected to begin with a header identifying the
2655 The chunk is expected to begin with a header identifying the
2652 format type so it can be routed to an appropriate decompressor.
2656 format type so it can be routed to an appropriate decompressor.
2653 """
2657 """
2654 if not data:
2658 if not data:
2655 return data
2659 return data
2656
2660
2657 # Revlogs are read much more frequently than they are written and many
2661 # Revlogs are read much more frequently than they are written and many
2658 # chunks only take microseconds to decompress, so performance is
2662 # chunks only take microseconds to decompress, so performance is
2659 # important here.
2663 # important here.
2660 #
2664 #
2661 # We can make a few assumptions about revlogs:
2665 # We can make a few assumptions about revlogs:
2662 #
2666 #
2663 # 1) the majority of chunks will be compressed (as opposed to inline
2667 # 1) the majority of chunks will be compressed (as opposed to inline
2664 # raw data).
2668 # raw data).
2665 # 2) decompressing *any* data will likely by at least 10x slower than
2669 # 2) decompressing *any* data will likely by at least 10x slower than
2666 # returning raw inline data.
2670 # returning raw inline data.
2667 # 3) we want to prioritize common and officially supported compression
2671 # 3) we want to prioritize common and officially supported compression
2668 # engines
2672 # engines
2669 #
2673 #
2670 # It follows that we want to optimize for "decompress compressed data
2674 # It follows that we want to optimize for "decompress compressed data
2671 # when encoded with common and officially supported compression engines"
2675 # when encoded with common and officially supported compression engines"
2672 # case over "raw data" and "data encoded by less common or non-official
2676 # case over "raw data" and "data encoded by less common or non-official
2673 # compression engines." That is why we have the inline lookup first
2677 # compression engines." That is why we have the inline lookup first
2674 # followed by the compengines lookup.
2678 # followed by the compengines lookup.
2675 #
2679 #
2676 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2680 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2677 # compressed chunks. And this matters for changelog and manifest reads.
2681 # compressed chunks. And this matters for changelog and manifest reads.
2678 t = data[0:1]
2682 t = data[0:1]
2679
2683
2680 if t == b'x':
2684 if t == b'x':
2681 try:
2685 try:
2682 return _zlibdecompress(data)
2686 return _zlibdecompress(data)
2683 except zlib.error as e:
2687 except zlib.error as e:
2684 raise error.RevlogError(
2688 raise error.RevlogError(
2685 _(b'revlog decompress error: %s')
2689 _(b'revlog decompress error: %s')
2686 % stringutil.forcebytestr(e)
2690 % stringutil.forcebytestr(e)
2687 )
2691 )
2688 # '\0' is more common than 'u' so it goes first.
2692 # '\0' is more common than 'u' so it goes first.
2689 elif t == b'\0':
2693 elif t == b'\0':
2690 return data
2694 return data
2691 elif t == b'u':
2695 elif t == b'u':
2692 return util.buffer(data, 1)
2696 return util.buffer(data, 1)
2693
2697
2694 compressor = self._get_decompressor(t)
2698 compressor = self._get_decompressor(t)
2695
2699
2696 return compressor.decompress(data)
2700 return compressor.decompress(data)
2697
2701
2698 def _addrevision(
2702 def _addrevision(
2699 self,
2703 self,
2700 node,
2704 node,
2701 rawtext,
2705 rawtext,
2702 transaction,
2706 transaction,
2703 link,
2707 link,
2704 p1,
2708 p1,
2705 p2,
2709 p2,
2706 flags,
2710 flags,
2707 cachedelta,
2711 cachedelta,
2708 alwayscache=False,
2712 alwayscache=False,
2709 deltacomputer=None,
2713 deltacomputer=None,
2710 sidedata=None,
2714 sidedata=None,
2711 ):
2715 ):
2712 """internal function to add revisions to the log
2716 """internal function to add revisions to the log
2713
2717
2714 see addrevision for argument descriptions.
2718 see addrevision for argument descriptions.
2715
2719
2716 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2720 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2717
2721
2718 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2722 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2719 be used.
2723 be used.
2720
2724
2721 invariants:
2725 invariants:
2722 - rawtext is optional (can be None); if not set, cachedelta must be set.
2726 - rawtext is optional (can be None); if not set, cachedelta must be set.
2723 if both are set, they must correspond to each other.
2727 if both are set, they must correspond to each other.
2724 """
2728 """
2725 if node == self.nullid:
2729 if node == self.nullid:
2726 raise error.RevlogError(
2730 raise error.RevlogError(
2727 _(b"%s: attempt to add null revision") % self.display_id
2731 _(b"%s: attempt to add null revision") % self.display_id
2728 )
2732 )
2729 if (
2733 if (
2730 node == self.nodeconstants.wdirid
2734 node == self.nodeconstants.wdirid
2731 or node in self.nodeconstants.wdirfilenodeids
2735 or node in self.nodeconstants.wdirfilenodeids
2732 ):
2736 ):
2733 raise error.RevlogError(
2737 raise error.RevlogError(
2734 _(b"%s: attempt to add wdir revision") % self.display_id
2738 _(b"%s: attempt to add wdir revision") % self.display_id
2735 )
2739 )
2736 if self._writinghandles is None:
2740 if self._writinghandles is None:
2737 msg = b'adding revision outside `revlog._writing` context'
2741 msg = b'adding revision outside `revlog._writing` context'
2738 raise error.ProgrammingError(msg)
2742 raise error.ProgrammingError(msg)
2739
2743
2740 btext = [rawtext]
2744 btext = [rawtext]
2741
2745
2742 curr = len(self)
2746 curr = len(self)
2743 prev = curr - 1
2747 prev = curr - 1
2744
2748
2745 offset = self._get_data_offset(prev)
2749 offset = self._get_data_offset(prev)
2746
2750
2747 if self._concurrencychecker:
2751 if self._concurrencychecker:
2748 ifh, dfh, sdfh = self._writinghandles
2752 ifh, dfh, sdfh = self._writinghandles
2749 # XXX no checking for the sidedata file
2753 # XXX no checking for the sidedata file
2750 if self._inline:
2754 if self._inline:
2751 # offset is "as if" it were in the .d file, so we need to add on
2755 # offset is "as if" it were in the .d file, so we need to add on
2752 # the size of the entry metadata.
2756 # the size of the entry metadata.
2753 self._concurrencychecker(
2757 self._concurrencychecker(
2754 ifh, self._indexfile, offset + curr * self.index.entry_size
2758 ifh, self._indexfile, offset + curr * self.index.entry_size
2755 )
2759 )
2756 else:
2760 else:
2757 # Entries in the .i are a consistent size.
2761 # Entries in the .i are a consistent size.
2758 self._concurrencychecker(
2762 self._concurrencychecker(
2759 ifh, self._indexfile, curr * self.index.entry_size
2763 ifh, self._indexfile, curr * self.index.entry_size
2760 )
2764 )
2761 self._concurrencychecker(dfh, self._datafile, offset)
2765 self._concurrencychecker(dfh, self._datafile, offset)
2762
2766
2763 p1r, p2r = self.rev(p1), self.rev(p2)
2767 p1r, p2r = self.rev(p1), self.rev(p2)
2764
2768
2765 # full versions are inserted when the needed deltas
2769 # full versions are inserted when the needed deltas
2766 # become comparable to the uncompressed text
2770 # become comparable to the uncompressed text
2767 if rawtext is None:
2771 if rawtext is None:
2768 # need rawtext size, before changed by flag processors, which is
2772 # need rawtext size, before changed by flag processors, which is
2769 # the non-raw size. use revlog explicitly to avoid filelog's extra
2773 # the non-raw size. use revlog explicitly to avoid filelog's extra
2770 # logic that might remove metadata size.
2774 # logic that might remove metadata size.
2771 textlen = mdiff.patchedsize(
2775 textlen = mdiff.patchedsize(
2772 revlog.size(self, cachedelta[0]), cachedelta[1]
2776 revlog.size(self, cachedelta[0]), cachedelta[1]
2773 )
2777 )
2774 else:
2778 else:
2775 textlen = len(rawtext)
2779 textlen = len(rawtext)
2776
2780
2777 if deltacomputer is None:
2781 if deltacomputer is None:
2778 write_debug = None
2782 write_debug = None
2779 if self._debug_delta:
2783 if self._debug_delta:
2780 write_debug = transaction._report
2784 write_debug = transaction._report
2781 deltacomputer = deltautil.deltacomputer(
2785 deltacomputer = deltautil.deltacomputer(
2782 self, write_debug=write_debug
2786 self, write_debug=write_debug
2783 )
2787 )
2784
2788
2785 if cachedelta is not None and len(cachedelta) == 2:
2789 if cachedelta is not None and len(cachedelta) == 2:
2786 # If the cached delta has no information about how it should be
2790 # If the cached delta has no information about how it should be
2787 # reused, add the default reuse instruction according to the
2791 # reused, add the default reuse instruction according to the
2788 # revlog's configuration.
2792 # revlog's configuration.
2789 if (
2793 if (
2790 self.delta_config.general_delta
2794 self.delta_config.general_delta
2791 and self.delta_config.lazy_delta_base
2795 and self.delta_config.lazy_delta_base
2792 ):
2796 ):
2793 delta_base_reuse = DELTA_BASE_REUSE_TRY
2797 delta_base_reuse = DELTA_BASE_REUSE_TRY
2794 else:
2798 else:
2795 delta_base_reuse = DELTA_BASE_REUSE_NO
2799 delta_base_reuse = DELTA_BASE_REUSE_NO
2796 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2800 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2797
2801
2798 revinfo = revlogutils.revisioninfo(
2802 revinfo = revlogutils.revisioninfo(
2799 node,
2803 node,
2800 p1,
2804 p1,
2801 p2,
2805 p2,
2802 btext,
2806 btext,
2803 textlen,
2807 textlen,
2804 cachedelta,
2808 cachedelta,
2805 flags,
2809 flags,
2806 )
2810 )
2807
2811
2808 deltainfo = deltacomputer.finddeltainfo(revinfo)
2812 deltainfo = deltacomputer.finddeltainfo(revinfo)
2809
2813
2810 compression_mode = COMP_MODE_INLINE
2814 compression_mode = COMP_MODE_INLINE
2811 if self._docket is not None:
2815 if self._docket is not None:
2812 default_comp = self._docket.default_compression_header
2816 default_comp = self._docket.default_compression_header
2813 r = deltautil.delta_compression(default_comp, deltainfo)
2817 r = deltautil.delta_compression(default_comp, deltainfo)
2814 compression_mode, deltainfo = r
2818 compression_mode, deltainfo = r
2815
2819
2816 sidedata_compression_mode = COMP_MODE_INLINE
2820 sidedata_compression_mode = COMP_MODE_INLINE
2817 if sidedata and self.hassidedata:
2821 if sidedata and self.hassidedata:
2818 sidedata_compression_mode = COMP_MODE_PLAIN
2822 sidedata_compression_mode = COMP_MODE_PLAIN
2819 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2823 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2820 sidedata_offset = self._docket.sidedata_end
2824 sidedata_offset = self._docket.sidedata_end
2821 h, comp_sidedata = self.compress(serialized_sidedata)
2825 h, comp_sidedata = self.compress(serialized_sidedata)
2822 if (
2826 if (
2823 h != b'u'
2827 h != b'u'
2824 and comp_sidedata[0:1] != b'\0'
2828 and comp_sidedata[0:1] != b'\0'
2825 and len(comp_sidedata) < len(serialized_sidedata)
2829 and len(comp_sidedata) < len(serialized_sidedata)
2826 ):
2830 ):
2827 assert not h
2831 assert not h
2828 if (
2832 if (
2829 comp_sidedata[0:1]
2833 comp_sidedata[0:1]
2830 == self._docket.default_compression_header
2834 == self._docket.default_compression_header
2831 ):
2835 ):
2832 sidedata_compression_mode = COMP_MODE_DEFAULT
2836 sidedata_compression_mode = COMP_MODE_DEFAULT
2833 serialized_sidedata = comp_sidedata
2837 serialized_sidedata = comp_sidedata
2834 else:
2838 else:
2835 sidedata_compression_mode = COMP_MODE_INLINE
2839 sidedata_compression_mode = COMP_MODE_INLINE
2836 serialized_sidedata = comp_sidedata
2840 serialized_sidedata = comp_sidedata
2837 else:
2841 else:
2838 serialized_sidedata = b""
2842 serialized_sidedata = b""
2839 # Don't store the offset if the sidedata is empty, that way
2843 # Don't store the offset if the sidedata is empty, that way
2840 # we can easily detect empty sidedata and they will be no different
2844 # we can easily detect empty sidedata and they will be no different
2841 # than ones we manually add.
2845 # than ones we manually add.
2842 sidedata_offset = 0
2846 sidedata_offset = 0
2843
2847
2844 rank = RANK_UNKNOWN
2848 rank = RANK_UNKNOWN
2845 if self._compute_rank:
2849 if self._compute_rank:
2846 if (p1r, p2r) == (nullrev, nullrev):
2850 if (p1r, p2r) == (nullrev, nullrev):
2847 rank = 1
2851 rank = 1
2848 elif p1r != nullrev and p2r == nullrev:
2852 elif p1r != nullrev and p2r == nullrev:
2849 rank = 1 + self.fast_rank(p1r)
2853 rank = 1 + self.fast_rank(p1r)
2850 elif p1r == nullrev and p2r != nullrev:
2854 elif p1r == nullrev and p2r != nullrev:
2851 rank = 1 + self.fast_rank(p2r)
2855 rank = 1 + self.fast_rank(p2r)
2852 else: # merge node
2856 else: # merge node
2853 if rustdagop is not None and self.index.rust_ext_compat:
2857 if rustdagop is not None and self.index.rust_ext_compat:
2854 rank = rustdagop.rank(self.index, p1r, p2r)
2858 rank = rustdagop.rank(self.index, p1r, p2r)
2855 else:
2859 else:
2856 pmin, pmax = sorted((p1r, p2r))
2860 pmin, pmax = sorted((p1r, p2r))
2857 rank = 1 + self.fast_rank(pmax)
2861 rank = 1 + self.fast_rank(pmax)
2858 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2862 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2859
2863
2860 e = revlogutils.entry(
2864 e = revlogutils.entry(
2861 flags=flags,
2865 flags=flags,
2862 data_offset=offset,
2866 data_offset=offset,
2863 data_compressed_length=deltainfo.deltalen,
2867 data_compressed_length=deltainfo.deltalen,
2864 data_uncompressed_length=textlen,
2868 data_uncompressed_length=textlen,
2865 data_compression_mode=compression_mode,
2869 data_compression_mode=compression_mode,
2866 data_delta_base=deltainfo.base,
2870 data_delta_base=deltainfo.base,
2867 link_rev=link,
2871 link_rev=link,
2868 parent_rev_1=p1r,
2872 parent_rev_1=p1r,
2869 parent_rev_2=p2r,
2873 parent_rev_2=p2r,
2870 node_id=node,
2874 node_id=node,
2871 sidedata_offset=sidedata_offset,
2875 sidedata_offset=sidedata_offset,
2872 sidedata_compressed_length=len(serialized_sidedata),
2876 sidedata_compressed_length=len(serialized_sidedata),
2873 sidedata_compression_mode=sidedata_compression_mode,
2877 sidedata_compression_mode=sidedata_compression_mode,
2874 rank=rank,
2878 rank=rank,
2875 )
2879 )
2876
2880
2877 self.index.append(e)
2881 self.index.append(e)
2878 entry = self.index.entry_binary(curr)
2882 entry = self.index.entry_binary(curr)
2879 if curr == 0 and self._docket is None:
2883 if curr == 0 and self._docket is None:
2880 header = self._format_flags | self._format_version
2884 header = self._format_flags | self._format_version
2881 header = self.index.pack_header(header)
2885 header = self.index.pack_header(header)
2882 entry = header + entry
2886 entry = header + entry
2883 self._writeentry(
2887 self._writeentry(
2884 transaction,
2888 transaction,
2885 entry,
2889 entry,
2886 deltainfo.data,
2890 deltainfo.data,
2887 link,
2891 link,
2888 offset,
2892 offset,
2889 serialized_sidedata,
2893 serialized_sidedata,
2890 sidedata_offset,
2894 sidedata_offset,
2891 )
2895 )
2892
2896
2893 rawtext = btext[0]
2897 rawtext = btext[0]
2894
2898
2895 if alwayscache and rawtext is None:
2899 if alwayscache and rawtext is None:
2896 rawtext = deltacomputer.buildtext(revinfo)
2900 rawtext = deltacomputer.buildtext(revinfo)
2897
2901
2898 if type(rawtext) == bytes: # only accept immutable objects
2902 if type(rawtext) == bytes: # only accept immutable objects
2899 self._revisioncache = (node, curr, rawtext)
2903 self._revisioncache = (node, curr, rawtext)
2900 self._chainbasecache[curr] = deltainfo.chainbase
2904 self._chainbasecache[curr] = deltainfo.chainbase
2901 return curr
2905 return curr
2902
2906
2903 def _get_data_offset(self, prev):
2907 def _get_data_offset(self, prev):
2904 """Returns the current offset in the (in-transaction) data file.
2908 """Returns the current offset in the (in-transaction) data file.
2905 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2909 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2906 file to store that information: since sidedata can be rewritten to the
2910 file to store that information: since sidedata can be rewritten to the
2907 end of the data file within a transaction, you can have cases where, for
2911 end of the data file within a transaction, you can have cases where, for
2908 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2912 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2909 to `n - 1`'s sidedata being written after `n`'s data.
2913 to `n - 1`'s sidedata being written after `n`'s data.
2910
2914
2911 TODO cache this in a docket file before getting out of experimental."""
2915 TODO cache this in a docket file before getting out of experimental."""
2912 if self._docket is None:
2916 if self._docket is None:
2913 return self.end(prev)
2917 return self.end(prev)
2914 else:
2918 else:
2915 return self._docket.data_end
2919 return self._docket.data_end
2916
2920
2917 def _writeentry(
2921 def _writeentry(
2918 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2922 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2919 ):
2923 ):
2920 # Files opened in a+ mode have inconsistent behavior on various
2924 # Files opened in a+ mode have inconsistent behavior on various
2921 # platforms. Windows requires that a file positioning call be made
2925 # platforms. Windows requires that a file positioning call be made
2922 # when the file handle transitions between reads and writes. See
2926 # when the file handle transitions between reads and writes. See
2923 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2927 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2924 # platforms, Python or the platform itself can be buggy. Some versions
2928 # platforms, Python or the platform itself can be buggy. Some versions
2925 # of Solaris have been observed to not append at the end of the file
2929 # of Solaris have been observed to not append at the end of the file
2926 # if the file was seeked to before the end. See issue4943 for more.
2930 # if the file was seeked to before the end. See issue4943 for more.
2927 #
2931 #
2928 # We work around this issue by inserting a seek() before writing.
2932 # We work around this issue by inserting a seek() before writing.
2929 # Note: This is likely not necessary on Python 3. However, because
2933 # Note: This is likely not necessary on Python 3. However, because
2930 # the file handle is reused for reads and may be seeked there, we need
2934 # the file handle is reused for reads and may be seeked there, we need
2931 # to be careful before changing this.
2935 # to be careful before changing this.
2932 if self._writinghandles is None:
2936 if self._writinghandles is None:
2933 msg = b'adding revision outside `revlog._writing` context'
2937 msg = b'adding revision outside `revlog._writing` context'
2934 raise error.ProgrammingError(msg)
2938 raise error.ProgrammingError(msg)
2935 ifh, dfh, sdfh = self._writinghandles
2939 ifh, dfh, sdfh = self._writinghandles
2936 if self._docket is None:
2940 if self._docket is None:
2937 ifh.seek(0, os.SEEK_END)
2941 ifh.seek(0, os.SEEK_END)
2938 else:
2942 else:
2939 ifh.seek(self._docket.index_end, os.SEEK_SET)
2943 ifh.seek(self._docket.index_end, os.SEEK_SET)
2940 if dfh:
2944 if dfh:
2941 if self._docket is None:
2945 if self._docket is None:
2942 dfh.seek(0, os.SEEK_END)
2946 dfh.seek(0, os.SEEK_END)
2943 else:
2947 else:
2944 dfh.seek(self._docket.data_end, os.SEEK_SET)
2948 dfh.seek(self._docket.data_end, os.SEEK_SET)
2945 if sdfh:
2949 if sdfh:
2946 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2950 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2947
2951
2948 curr = len(self) - 1
2952 curr = len(self) - 1
2949 if not self._inline:
2953 if not self._inline:
2950 transaction.add(self._datafile, offset)
2954 transaction.add(self._datafile, offset)
2951 if self._sidedatafile:
2955 if self._sidedatafile:
2952 transaction.add(self._sidedatafile, sidedata_offset)
2956 transaction.add(self._sidedatafile, sidedata_offset)
2953 transaction.add(self._indexfile, curr * len(entry))
2957 transaction.add(self._indexfile, curr * len(entry))
2954 if data[0]:
2958 if data[0]:
2955 dfh.write(data[0])
2959 dfh.write(data[0])
2956 dfh.write(data[1])
2960 dfh.write(data[1])
2957 if sidedata:
2961 if sidedata:
2958 sdfh.write(sidedata)
2962 sdfh.write(sidedata)
2959 ifh.write(entry)
2963 ifh.write(entry)
2960 else:
2964 else:
2961 offset += curr * self.index.entry_size
2965 offset += curr * self.index.entry_size
2962 transaction.add(self._indexfile, offset)
2966 transaction.add(self._indexfile, offset)
2963 ifh.write(entry)
2967 ifh.write(entry)
2964 ifh.write(data[0])
2968 ifh.write(data[0])
2965 ifh.write(data[1])
2969 ifh.write(data[1])
2966 assert not sidedata
2970 assert not sidedata
2967 self._enforceinlinesize(transaction)
2971 self._enforceinlinesize(transaction)
2968 if self._docket is not None:
2972 if self._docket is not None:
2969 # revlog-v2 always has 3 writing handles, help Pytype
2973 # revlog-v2 always has 3 writing handles, help Pytype
2970 wh1 = self._writinghandles[0]
2974 wh1 = self._writinghandles[0]
2971 wh2 = self._writinghandles[1]
2975 wh2 = self._writinghandles[1]
2972 wh3 = self._writinghandles[2]
2976 wh3 = self._writinghandles[2]
2973 assert wh1 is not None
2977 assert wh1 is not None
2974 assert wh2 is not None
2978 assert wh2 is not None
2975 assert wh3 is not None
2979 assert wh3 is not None
2976 self._docket.index_end = wh1.tell()
2980 self._docket.index_end = wh1.tell()
2977 self._docket.data_end = wh2.tell()
2981 self._docket.data_end = wh2.tell()
2978 self._docket.sidedata_end = wh3.tell()
2982 self._docket.sidedata_end = wh3.tell()
2979
2983
2980 nodemaputil.setup_persistent_nodemap(transaction, self)
2984 nodemaputil.setup_persistent_nodemap(transaction, self)
2981
2985
2982 def addgroup(
2986 def addgroup(
2983 self,
2987 self,
2984 deltas,
2988 deltas,
2985 linkmapper,
2989 linkmapper,
2986 transaction,
2990 transaction,
2987 alwayscache=False,
2991 alwayscache=False,
2988 addrevisioncb=None,
2992 addrevisioncb=None,
2989 duplicaterevisioncb=None,
2993 duplicaterevisioncb=None,
2990 debug_info=None,
2994 debug_info=None,
2991 delta_base_reuse_policy=None,
2995 delta_base_reuse_policy=None,
2992 ):
2996 ):
2993 """
2997 """
2994 add a delta group
2998 add a delta group
2995
2999
2996 given a set of deltas, add them to the revision log. the
3000 given a set of deltas, add them to the revision log. the
2997 first delta is against its parent, which should be in our
3001 first delta is against its parent, which should be in our
2998 log, the rest are against the previous delta.
3002 log, the rest are against the previous delta.
2999
3003
3000 If ``addrevisioncb`` is defined, it will be called with arguments of
3004 If ``addrevisioncb`` is defined, it will be called with arguments of
3001 this revlog and the node that was added.
3005 this revlog and the node that was added.
3002 """
3006 """
3003
3007
3004 if self._adding_group:
3008 if self._adding_group:
3005 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3009 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3006
3010
3007 # read the default delta-base reuse policy from revlog config if the
3011 # read the default delta-base reuse policy from revlog config if the
3008 # group did not specify one.
3012 # group did not specify one.
3009 if delta_base_reuse_policy is None:
3013 if delta_base_reuse_policy is None:
3010 if (
3014 if (
3011 self.delta_config.general_delta
3015 self.delta_config.general_delta
3012 and self.delta_config.lazy_delta_base
3016 and self.delta_config.lazy_delta_base
3013 ):
3017 ):
3014 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3018 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3015 else:
3019 else:
3016 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3020 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3017
3021
3018 self._adding_group = True
3022 self._adding_group = True
3019 empty = True
3023 empty = True
3020 try:
3024 try:
3021 with self._writing(transaction):
3025 with self._writing(transaction):
3022 write_debug = None
3026 write_debug = None
3023 if self._debug_delta:
3027 if self._debug_delta:
3024 write_debug = transaction._report
3028 write_debug = transaction._report
3025 deltacomputer = deltautil.deltacomputer(
3029 deltacomputer = deltautil.deltacomputer(
3026 self,
3030 self,
3027 write_debug=write_debug,
3031 write_debug=write_debug,
3028 debug_info=debug_info,
3032 debug_info=debug_info,
3029 )
3033 )
3030 # loop through our set of deltas
3034 # loop through our set of deltas
3031 for data in deltas:
3035 for data in deltas:
3032 (
3036 (
3033 node,
3037 node,
3034 p1,
3038 p1,
3035 p2,
3039 p2,
3036 linknode,
3040 linknode,
3037 deltabase,
3041 deltabase,
3038 delta,
3042 delta,
3039 flags,
3043 flags,
3040 sidedata,
3044 sidedata,
3041 ) = data
3045 ) = data
3042 link = linkmapper(linknode)
3046 link = linkmapper(linknode)
3043 flags = flags or REVIDX_DEFAULT_FLAGS
3047 flags = flags or REVIDX_DEFAULT_FLAGS
3044
3048
3045 rev = self.index.get_rev(node)
3049 rev = self.index.get_rev(node)
3046 if rev is not None:
3050 if rev is not None:
3047 # this can happen if two branches make the same change
3051 # this can happen if two branches make the same change
3048 self._nodeduplicatecallback(transaction, rev)
3052 self._nodeduplicatecallback(transaction, rev)
3049 if duplicaterevisioncb:
3053 if duplicaterevisioncb:
3050 duplicaterevisioncb(self, rev)
3054 duplicaterevisioncb(self, rev)
3051 empty = False
3055 empty = False
3052 continue
3056 continue
3053
3057
3054 for p in (p1, p2):
3058 for p in (p1, p2):
3055 if not self.index.has_node(p):
3059 if not self.index.has_node(p):
3056 raise error.LookupError(
3060 raise error.LookupError(
3057 p, self.radix, _(b'unknown parent')
3061 p, self.radix, _(b'unknown parent')
3058 )
3062 )
3059
3063
3060 if not self.index.has_node(deltabase):
3064 if not self.index.has_node(deltabase):
3061 raise error.LookupError(
3065 raise error.LookupError(
3062 deltabase, self.display_id, _(b'unknown delta base')
3066 deltabase, self.display_id, _(b'unknown delta base')
3063 )
3067 )
3064
3068
3065 baserev = self.rev(deltabase)
3069 baserev = self.rev(deltabase)
3066
3070
3067 if baserev != nullrev and self.iscensored(baserev):
3071 if baserev != nullrev and self.iscensored(baserev):
3068 # if base is censored, delta must be full replacement in a
3072 # if base is censored, delta must be full replacement in a
3069 # single patch operation
3073 # single patch operation
3070 hlen = struct.calcsize(b">lll")
3074 hlen = struct.calcsize(b">lll")
3071 oldlen = self.rawsize(baserev)
3075 oldlen = self.rawsize(baserev)
3072 newlen = len(delta) - hlen
3076 newlen = len(delta) - hlen
3073 if delta[:hlen] != mdiff.replacediffheader(
3077 if delta[:hlen] != mdiff.replacediffheader(
3074 oldlen, newlen
3078 oldlen, newlen
3075 ):
3079 ):
3076 raise error.CensoredBaseError(
3080 raise error.CensoredBaseError(
3077 self.display_id, self.node(baserev)
3081 self.display_id, self.node(baserev)
3078 )
3082 )
3079
3083
3080 if not flags and self._peek_iscensored(baserev, delta):
3084 if not flags and self._peek_iscensored(baserev, delta):
3081 flags |= REVIDX_ISCENSORED
3085 flags |= REVIDX_ISCENSORED
3082
3086
3083 # We assume consumers of addrevisioncb will want to retrieve
3087 # We assume consumers of addrevisioncb will want to retrieve
3084 # the added revision, which will require a call to
3088 # the added revision, which will require a call to
3085 # revision(). revision() will fast path if there is a cache
3089 # revision(). revision() will fast path if there is a cache
3086 # hit. So, we tell _addrevision() to always cache in this case.
3090 # hit. So, we tell _addrevision() to always cache in this case.
3087 # We're only using addgroup() in the context of changegroup
3091 # We're only using addgroup() in the context of changegroup
3088 # generation so the revision data can always be handled as raw
3092 # generation so the revision data can always be handled as raw
3089 # by the flagprocessor.
3093 # by the flagprocessor.
3090 rev = self._addrevision(
3094 rev = self._addrevision(
3091 node,
3095 node,
3092 None,
3096 None,
3093 transaction,
3097 transaction,
3094 link,
3098 link,
3095 p1,
3099 p1,
3096 p2,
3100 p2,
3097 flags,
3101 flags,
3098 (baserev, delta, delta_base_reuse_policy),
3102 (baserev, delta, delta_base_reuse_policy),
3099 alwayscache=alwayscache,
3103 alwayscache=alwayscache,
3100 deltacomputer=deltacomputer,
3104 deltacomputer=deltacomputer,
3101 sidedata=sidedata,
3105 sidedata=sidedata,
3102 )
3106 )
3103
3107
3104 if addrevisioncb:
3108 if addrevisioncb:
3105 addrevisioncb(self, rev)
3109 addrevisioncb(self, rev)
3106 empty = False
3110 empty = False
3107 finally:
3111 finally:
3108 self._adding_group = False
3112 self._adding_group = False
3109 return not empty
3113 return not empty
3110
3114
3111 def iscensored(self, rev):
3115 def iscensored(self, rev):
3112 """Check if a file revision is censored."""
3116 """Check if a file revision is censored."""
3113 if not self._censorable:
3117 if not self._censorable:
3114 return False
3118 return False
3115
3119
3116 return self.flags(rev) & REVIDX_ISCENSORED
3120 return self.flags(rev) & REVIDX_ISCENSORED
3117
3121
3118 def _peek_iscensored(self, baserev, delta):
3122 def _peek_iscensored(self, baserev, delta):
3119 """Quickly check if a delta produces a censored revision."""
3123 """Quickly check if a delta produces a censored revision."""
3120 if not self._censorable:
3124 if not self._censorable:
3121 return False
3125 return False
3122
3126
3123 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3127 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3124
3128
3125 def getstrippoint(self, minlink):
3129 def getstrippoint(self, minlink):
3126 """find the minimum rev that must be stripped to strip the linkrev
3130 """find the minimum rev that must be stripped to strip the linkrev
3127
3131
3128 Returns a tuple containing the minimum rev and a set of all revs that
3132 Returns a tuple containing the minimum rev and a set of all revs that
3129 have linkrevs that will be broken by this strip.
3133 have linkrevs that will be broken by this strip.
3130 """
3134 """
3131 return storageutil.resolvestripinfo(
3135 return storageutil.resolvestripinfo(
3132 minlink,
3136 minlink,
3133 len(self) - 1,
3137 len(self) - 1,
3134 self.headrevs(),
3138 self.headrevs(),
3135 self.linkrev,
3139 self.linkrev,
3136 self.parentrevs,
3140 self.parentrevs,
3137 )
3141 )
3138
3142
3139 def strip(self, minlink, transaction):
3143 def strip(self, minlink, transaction):
3140 """truncate the revlog on the first revision with a linkrev >= minlink
3144 """truncate the revlog on the first revision with a linkrev >= minlink
3141
3145
3142 This function is called when we're stripping revision minlink and
3146 This function is called when we're stripping revision minlink and
3143 its descendants from the repository.
3147 its descendants from the repository.
3144
3148
3145 We have to remove all revisions with linkrev >= minlink, because
3149 We have to remove all revisions with linkrev >= minlink, because
3146 the equivalent changelog revisions will be renumbered after the
3150 the equivalent changelog revisions will be renumbered after the
3147 strip.
3151 strip.
3148
3152
3149 So we truncate the revlog on the first of these revisions, and
3153 So we truncate the revlog on the first of these revisions, and
3150 trust that the caller has saved the revisions that shouldn't be
3154 trust that the caller has saved the revisions that shouldn't be
3151 removed and that it'll re-add them after this truncation.
3155 removed and that it'll re-add them after this truncation.
3152 """
3156 """
3153 if len(self) == 0:
3157 if len(self) == 0:
3154 return
3158 return
3155
3159
3156 rev, _ = self.getstrippoint(minlink)
3160 rev, _ = self.getstrippoint(minlink)
3157 if rev == len(self):
3161 if rev == len(self):
3158 return
3162 return
3159
3163
3160 # first truncate the files on disk
3164 # first truncate the files on disk
3161 data_end = self.start(rev)
3165 data_end = self.start(rev)
3162 if not self._inline:
3166 if not self._inline:
3163 transaction.add(self._datafile, data_end)
3167 transaction.add(self._datafile, data_end)
3164 end = rev * self.index.entry_size
3168 end = rev * self.index.entry_size
3165 else:
3169 else:
3166 end = data_end + (rev * self.index.entry_size)
3170 end = data_end + (rev * self.index.entry_size)
3167
3171
3168 if self._sidedatafile:
3172 if self._sidedatafile:
3169 sidedata_end = self.sidedata_cut_off(rev)
3173 sidedata_end = self.sidedata_cut_off(rev)
3170 transaction.add(self._sidedatafile, sidedata_end)
3174 transaction.add(self._sidedatafile, sidedata_end)
3171
3175
3172 transaction.add(self._indexfile, end)
3176 transaction.add(self._indexfile, end)
3173 if self._docket is not None:
3177 if self._docket is not None:
3174 # XXX we could, leverage the docket while stripping. However it is
3178 # XXX we could, leverage the docket while stripping. However it is
3175 # not powerfull enough at the time of this comment
3179 # not powerfull enough at the time of this comment
3176 self._docket.index_end = end
3180 self._docket.index_end = end
3177 self._docket.data_end = data_end
3181 self._docket.data_end = data_end
3178 self._docket.sidedata_end = sidedata_end
3182 self._docket.sidedata_end = sidedata_end
3179 self._docket.write(transaction, stripping=True)
3183 self._docket.write(transaction, stripping=True)
3180
3184
3181 # then reset internal state in memory to forget those revisions
3185 # then reset internal state in memory to forget those revisions
3182 self._revisioncache = None
3186 self._revisioncache = None
3183 self._chaininfocache = util.lrucachedict(500)
3187 self._chaininfocache = util.lrucachedict(500)
3184 self._segmentfile.clear_cache()
3188 self._segmentfile.clear_cache()
3185 self._segmentfile_sidedata.clear_cache()
3189 self._segmentfile_sidedata.clear_cache()
3186
3190
3187 del self.index[rev:-1]
3191 del self.index[rev:-1]
3188
3192
3189 def checksize(self):
3193 def checksize(self):
3190 """Check size of index and data files
3194 """Check size of index and data files
3191
3195
3192 return a (dd, di) tuple.
3196 return a (dd, di) tuple.
3193 - dd: extra bytes for the "data" file
3197 - dd: extra bytes for the "data" file
3194 - di: extra bytes for the "index" file
3198 - di: extra bytes for the "index" file
3195
3199
3196 A healthy revlog will return (0, 0).
3200 A healthy revlog will return (0, 0).
3197 """
3201 """
3198 expected = 0
3202 expected = 0
3199 if len(self):
3203 if len(self):
3200 expected = max(0, self.end(len(self) - 1))
3204 expected = max(0, self.end(len(self) - 1))
3201
3205
3202 try:
3206 try:
3203 with self._datafp() as f:
3207 with self._datafp() as f:
3204 f.seek(0, io.SEEK_END)
3208 f.seek(0, io.SEEK_END)
3205 actual = f.tell()
3209 actual = f.tell()
3206 dd = actual - expected
3210 dd = actual - expected
3207 except FileNotFoundError:
3211 except FileNotFoundError:
3208 dd = 0
3212 dd = 0
3209
3213
3210 try:
3214 try:
3211 f = self.opener(self._indexfile)
3215 f = self.opener(self._indexfile)
3212 f.seek(0, io.SEEK_END)
3216 f.seek(0, io.SEEK_END)
3213 actual = f.tell()
3217 actual = f.tell()
3214 f.close()
3218 f.close()
3215 s = self.index.entry_size
3219 s = self.index.entry_size
3216 i = max(0, actual // s)
3220 i = max(0, actual // s)
3217 di = actual - (i * s)
3221 di = actual - (i * s)
3218 if self._inline:
3222 if self._inline:
3219 databytes = 0
3223 databytes = 0
3220 for r in self:
3224 for r in self:
3221 databytes += max(0, self.length(r))
3225 databytes += max(0, self.length(r))
3222 dd = 0
3226 dd = 0
3223 di = actual - len(self) * s - databytes
3227 di = actual - len(self) * s - databytes
3224 except FileNotFoundError:
3228 except FileNotFoundError:
3225 di = 0
3229 di = 0
3226
3230
3227 return (dd, di)
3231 return (dd, di)
3228
3232
3229 def files(self):
3233 def files(self):
3230 res = [self._indexfile]
3234 res = [self._indexfile]
3231 if self._docket_file is None:
3235 if self._docket_file is None:
3232 if not self._inline:
3236 if not self._inline:
3233 res.append(self._datafile)
3237 res.append(self._datafile)
3234 else:
3238 else:
3235 res.append(self._docket_file)
3239 res.append(self._docket_file)
3236 res.extend(self._docket.old_index_filepaths(include_empty=False))
3240 res.extend(self._docket.old_index_filepaths(include_empty=False))
3237 if self._docket.data_end:
3241 if self._docket.data_end:
3238 res.append(self._datafile)
3242 res.append(self._datafile)
3239 res.extend(self._docket.old_data_filepaths(include_empty=False))
3243 res.extend(self._docket.old_data_filepaths(include_empty=False))
3240 if self._docket.sidedata_end:
3244 if self._docket.sidedata_end:
3241 res.append(self._sidedatafile)
3245 res.append(self._sidedatafile)
3242 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3246 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3243 return res
3247 return res
3244
3248
3245 def emitrevisions(
3249 def emitrevisions(
3246 self,
3250 self,
3247 nodes,
3251 nodes,
3248 nodesorder=None,
3252 nodesorder=None,
3249 revisiondata=False,
3253 revisiondata=False,
3250 assumehaveparentrevisions=False,
3254 assumehaveparentrevisions=False,
3251 deltamode=repository.CG_DELTAMODE_STD,
3255 deltamode=repository.CG_DELTAMODE_STD,
3252 sidedata_helpers=None,
3256 sidedata_helpers=None,
3253 debug_info=None,
3257 debug_info=None,
3254 ):
3258 ):
3255 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3259 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3256 raise error.ProgrammingError(
3260 raise error.ProgrammingError(
3257 b'unhandled value for nodesorder: %s' % nodesorder
3261 b'unhandled value for nodesorder: %s' % nodesorder
3258 )
3262 )
3259
3263
3260 if nodesorder is None and not self.delta_config.general_delta:
3264 if nodesorder is None and not self.delta_config.general_delta:
3261 nodesorder = b'storage'
3265 nodesorder = b'storage'
3262
3266
3263 if (
3267 if (
3264 not self._storedeltachains
3268 not self._storedeltachains
3265 and deltamode != repository.CG_DELTAMODE_PREV
3269 and deltamode != repository.CG_DELTAMODE_PREV
3266 ):
3270 ):
3267 deltamode = repository.CG_DELTAMODE_FULL
3271 deltamode = repository.CG_DELTAMODE_FULL
3268
3272
3269 return storageutil.emitrevisions(
3273 return storageutil.emitrevisions(
3270 self,
3274 self,
3271 nodes,
3275 nodes,
3272 nodesorder,
3276 nodesorder,
3273 revlogrevisiondelta,
3277 revlogrevisiondelta,
3274 deltaparentfn=self.deltaparent,
3278 deltaparentfn=self.deltaparent,
3275 candeltafn=self._candelta,
3279 candeltafn=self._candelta,
3276 rawsizefn=self.rawsize,
3280 rawsizefn=self.rawsize,
3277 revdifffn=self.revdiff,
3281 revdifffn=self.revdiff,
3278 flagsfn=self.flags,
3282 flagsfn=self.flags,
3279 deltamode=deltamode,
3283 deltamode=deltamode,
3280 revisiondata=revisiondata,
3284 revisiondata=revisiondata,
3281 assumehaveparentrevisions=assumehaveparentrevisions,
3285 assumehaveparentrevisions=assumehaveparentrevisions,
3282 sidedata_helpers=sidedata_helpers,
3286 sidedata_helpers=sidedata_helpers,
3283 debug_info=debug_info,
3287 debug_info=debug_info,
3284 )
3288 )
3285
3289
3286 DELTAREUSEALWAYS = b'always'
3290 DELTAREUSEALWAYS = b'always'
3287 DELTAREUSESAMEREVS = b'samerevs'
3291 DELTAREUSESAMEREVS = b'samerevs'
3288 DELTAREUSENEVER = b'never'
3292 DELTAREUSENEVER = b'never'
3289
3293
3290 DELTAREUSEFULLADD = b'fulladd'
3294 DELTAREUSEFULLADD = b'fulladd'
3291
3295
3292 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3296 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3293
3297
3294 def clone(
3298 def clone(
3295 self,
3299 self,
3296 tr,
3300 tr,
3297 destrevlog,
3301 destrevlog,
3298 addrevisioncb=None,
3302 addrevisioncb=None,
3299 deltareuse=DELTAREUSESAMEREVS,
3303 deltareuse=DELTAREUSESAMEREVS,
3300 forcedeltabothparents=None,
3304 forcedeltabothparents=None,
3301 sidedata_helpers=None,
3305 sidedata_helpers=None,
3302 ):
3306 ):
3303 """Copy this revlog to another, possibly with format changes.
3307 """Copy this revlog to another, possibly with format changes.
3304
3308
3305 The destination revlog will contain the same revisions and nodes.
3309 The destination revlog will contain the same revisions and nodes.
3306 However, it may not be bit-for-bit identical due to e.g. delta encoding
3310 However, it may not be bit-for-bit identical due to e.g. delta encoding
3307 differences.
3311 differences.
3308
3312
3309 The ``deltareuse`` argument control how deltas from the existing revlog
3313 The ``deltareuse`` argument control how deltas from the existing revlog
3310 are preserved in the destination revlog. The argument can have the
3314 are preserved in the destination revlog. The argument can have the
3311 following values:
3315 following values:
3312
3316
3313 DELTAREUSEALWAYS
3317 DELTAREUSEALWAYS
3314 Deltas will always be reused (if possible), even if the destination
3318 Deltas will always be reused (if possible), even if the destination
3315 revlog would not select the same revisions for the delta. This is the
3319 revlog would not select the same revisions for the delta. This is the
3316 fastest mode of operation.
3320 fastest mode of operation.
3317 DELTAREUSESAMEREVS
3321 DELTAREUSESAMEREVS
3318 Deltas will be reused if the destination revlog would pick the same
3322 Deltas will be reused if the destination revlog would pick the same
3319 revisions for the delta. This mode strikes a balance between speed
3323 revisions for the delta. This mode strikes a balance between speed
3320 and optimization.
3324 and optimization.
3321 DELTAREUSENEVER
3325 DELTAREUSENEVER
3322 Deltas will never be reused. This is the slowest mode of execution.
3326 Deltas will never be reused. This is the slowest mode of execution.
3323 This mode can be used to recompute deltas (e.g. if the diff/delta
3327 This mode can be used to recompute deltas (e.g. if the diff/delta
3324 algorithm changes).
3328 algorithm changes).
3325 DELTAREUSEFULLADD
3329 DELTAREUSEFULLADD
3326 Revision will be re-added as if their were new content. This is
3330 Revision will be re-added as if their were new content. This is
3327 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3331 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3328 eg: large file detection and handling.
3332 eg: large file detection and handling.
3329
3333
3330 Delta computation can be slow, so the choice of delta reuse policy can
3334 Delta computation can be slow, so the choice of delta reuse policy can
3331 significantly affect run time.
3335 significantly affect run time.
3332
3336
3333 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3337 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3334 two extremes. Deltas will be reused if they are appropriate. But if the
3338 two extremes. Deltas will be reused if they are appropriate. But if the
3335 delta could choose a better revision, it will do so. This means if you
3339 delta could choose a better revision, it will do so. This means if you
3336 are converting a non-generaldelta revlog to a generaldelta revlog,
3340 are converting a non-generaldelta revlog to a generaldelta revlog,
3337 deltas will be recomputed if the delta's parent isn't a parent of the
3341 deltas will be recomputed if the delta's parent isn't a parent of the
3338 revision.
3342 revision.
3339
3343
3340 In addition to the delta policy, the ``forcedeltabothparents``
3344 In addition to the delta policy, the ``forcedeltabothparents``
3341 argument controls whether to force compute deltas against both parents
3345 argument controls whether to force compute deltas against both parents
3342 for merges. By default, the current default is used.
3346 for merges. By default, the current default is used.
3343
3347
3344 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3348 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3345 `sidedata_helpers`.
3349 `sidedata_helpers`.
3346 """
3350 """
3347 if deltareuse not in self.DELTAREUSEALL:
3351 if deltareuse not in self.DELTAREUSEALL:
3348 raise ValueError(
3352 raise ValueError(
3349 _(b'value for deltareuse invalid: %s') % deltareuse
3353 _(b'value for deltareuse invalid: %s') % deltareuse
3350 )
3354 )
3351
3355
3352 if len(destrevlog):
3356 if len(destrevlog):
3353 raise ValueError(_(b'destination revlog is not empty'))
3357 raise ValueError(_(b'destination revlog is not empty'))
3354
3358
3355 if getattr(self, 'filteredrevs', None):
3359 if getattr(self, 'filteredrevs', None):
3356 raise ValueError(_(b'source revlog has filtered revisions'))
3360 raise ValueError(_(b'source revlog has filtered revisions'))
3357 if getattr(destrevlog, 'filteredrevs', None):
3361 if getattr(destrevlog, 'filteredrevs', None):
3358 raise ValueError(_(b'destination revlog has filtered revisions'))
3362 raise ValueError(_(b'destination revlog has filtered revisions'))
3359
3363
3360 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3364 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3361 # if possible.
3365 # if possible.
3362 old_delta_config = destrevlog.delta_config
3366 old_delta_config = destrevlog.delta_config
3363 destrevlog.delta_config = destrevlog.delta_config.copy()
3367 destrevlog.delta_config = destrevlog.delta_config.copy()
3364
3368
3365 try:
3369 try:
3366 if deltareuse == self.DELTAREUSEALWAYS:
3370 if deltareuse == self.DELTAREUSEALWAYS:
3367 destrevlog.delta_config.lazy_delta_base = True
3371 destrevlog.delta_config.lazy_delta_base = True
3368 destrevlog.delta_config.lazy_delta = True
3372 destrevlog.delta_config.lazy_delta = True
3369 elif deltareuse == self.DELTAREUSESAMEREVS:
3373 elif deltareuse == self.DELTAREUSESAMEREVS:
3370 destrevlog.delta_config.lazy_delta_base = False
3374 destrevlog.delta_config.lazy_delta_base = False
3371 destrevlog.delta_config.lazy_delta = True
3375 destrevlog.delta_config.lazy_delta = True
3372 elif deltareuse == self.DELTAREUSENEVER:
3376 elif deltareuse == self.DELTAREUSENEVER:
3373 destrevlog.delta_config.lazy_delta_base = False
3377 destrevlog.delta_config.lazy_delta_base = False
3374 destrevlog.delta_config.lazy_delta = False
3378 destrevlog.delta_config.lazy_delta = False
3375
3379
3376 delta_both_parents = (
3380 delta_both_parents = (
3377 forcedeltabothparents or old_delta_config.delta_both_parents
3381 forcedeltabothparents or old_delta_config.delta_both_parents
3378 )
3382 )
3379 destrevlog.delta_config.delta_both_parents = delta_both_parents
3383 destrevlog.delta_config.delta_both_parents = delta_both_parents
3380
3384
3381 with self.reading():
3385 with self.reading():
3382 self._clone(
3386 self._clone(
3383 tr,
3387 tr,
3384 destrevlog,
3388 destrevlog,
3385 addrevisioncb,
3389 addrevisioncb,
3386 deltareuse,
3390 deltareuse,
3387 forcedeltabothparents,
3391 forcedeltabothparents,
3388 sidedata_helpers,
3392 sidedata_helpers,
3389 )
3393 )
3390
3394
3391 finally:
3395 finally:
3392 destrevlog.delta_config = old_delta_config
3396 destrevlog.delta_config = old_delta_config
3393
3397
3394 def _clone(
3398 def _clone(
3395 self,
3399 self,
3396 tr,
3400 tr,
3397 destrevlog,
3401 destrevlog,
3398 addrevisioncb,
3402 addrevisioncb,
3399 deltareuse,
3403 deltareuse,
3400 forcedeltabothparents,
3404 forcedeltabothparents,
3401 sidedata_helpers,
3405 sidedata_helpers,
3402 ):
3406 ):
3403 """perform the core duty of `revlog.clone` after parameter processing"""
3407 """perform the core duty of `revlog.clone` after parameter processing"""
3404 write_debug = None
3408 write_debug = None
3405 if self._debug_delta:
3409 if self._debug_delta:
3406 write_debug = tr._report
3410 write_debug = tr._report
3407 deltacomputer = deltautil.deltacomputer(
3411 deltacomputer = deltautil.deltacomputer(
3408 destrevlog,
3412 destrevlog,
3409 write_debug=write_debug,
3413 write_debug=write_debug,
3410 )
3414 )
3411 index = self.index
3415 index = self.index
3412 for rev in self:
3416 for rev in self:
3413 entry = index[rev]
3417 entry = index[rev]
3414
3418
3415 # Some classes override linkrev to take filtered revs into
3419 # Some classes override linkrev to take filtered revs into
3416 # account. Use raw entry from index.
3420 # account. Use raw entry from index.
3417 flags = entry[0] & 0xFFFF
3421 flags = entry[0] & 0xFFFF
3418 linkrev = entry[4]
3422 linkrev = entry[4]
3419 p1 = index[entry[5]][7]
3423 p1 = index[entry[5]][7]
3420 p2 = index[entry[6]][7]
3424 p2 = index[entry[6]][7]
3421 node = entry[7]
3425 node = entry[7]
3422
3426
3423 # (Possibly) reuse the delta from the revlog if allowed and
3427 # (Possibly) reuse the delta from the revlog if allowed and
3424 # the revlog chunk is a delta.
3428 # the revlog chunk is a delta.
3425 cachedelta = None
3429 cachedelta = None
3426 rawtext = None
3430 rawtext = None
3427 if deltareuse == self.DELTAREUSEFULLADD:
3431 if deltareuse == self.DELTAREUSEFULLADD:
3428 text = self._revisiondata(rev)
3432 text = self._revisiondata(rev)
3429 sidedata = self.sidedata(rev)
3433 sidedata = self.sidedata(rev)
3430
3434
3431 if sidedata_helpers is not None:
3435 if sidedata_helpers is not None:
3432 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3436 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3433 self, sidedata_helpers, sidedata, rev
3437 self, sidedata_helpers, sidedata, rev
3434 )
3438 )
3435 flags = flags | new_flags[0] & ~new_flags[1]
3439 flags = flags | new_flags[0] & ~new_flags[1]
3436
3440
3437 destrevlog.addrevision(
3441 destrevlog.addrevision(
3438 text,
3442 text,
3439 tr,
3443 tr,
3440 linkrev,
3444 linkrev,
3441 p1,
3445 p1,
3442 p2,
3446 p2,
3443 cachedelta=cachedelta,
3447 cachedelta=cachedelta,
3444 node=node,
3448 node=node,
3445 flags=flags,
3449 flags=flags,
3446 deltacomputer=deltacomputer,
3450 deltacomputer=deltacomputer,
3447 sidedata=sidedata,
3451 sidedata=sidedata,
3448 )
3452 )
3449 else:
3453 else:
3450 if destrevlog._lazydelta:
3454 if destrevlog._lazydelta:
3451 dp = self.deltaparent(rev)
3455 dp = self.deltaparent(rev)
3452 if dp != nullrev:
3456 if dp != nullrev:
3453 cachedelta = (dp, bytes(self._chunk(rev)))
3457 cachedelta = (dp, bytes(self._chunk(rev)))
3454
3458
3455 sidedata = None
3459 sidedata = None
3456 if not cachedelta:
3460 if not cachedelta:
3457 rawtext = self._revisiondata(rev)
3461 rawtext = self._revisiondata(rev)
3458 sidedata = self.sidedata(rev)
3462 sidedata = self.sidedata(rev)
3459 if sidedata is None:
3463 if sidedata is None:
3460 sidedata = self.sidedata(rev)
3464 sidedata = self.sidedata(rev)
3461
3465
3462 if sidedata_helpers is not None:
3466 if sidedata_helpers is not None:
3463 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3467 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3464 self, sidedata_helpers, sidedata, rev
3468 self, sidedata_helpers, sidedata, rev
3465 )
3469 )
3466 flags = flags | new_flags[0] & ~new_flags[1]
3470 flags = flags | new_flags[0] & ~new_flags[1]
3467
3471
3468 with destrevlog._writing(tr):
3472 with destrevlog._writing(tr):
3469 destrevlog._addrevision(
3473 destrevlog._addrevision(
3470 node,
3474 node,
3471 rawtext,
3475 rawtext,
3472 tr,
3476 tr,
3473 linkrev,
3477 linkrev,
3474 p1,
3478 p1,
3475 p2,
3479 p2,
3476 flags,
3480 flags,
3477 cachedelta,
3481 cachedelta,
3478 deltacomputer=deltacomputer,
3482 deltacomputer=deltacomputer,
3479 sidedata=sidedata,
3483 sidedata=sidedata,
3480 )
3484 )
3481
3485
3482 if addrevisioncb:
3486 if addrevisioncb:
3483 addrevisioncb(self, rev, node)
3487 addrevisioncb(self, rev, node)
3484
3488
3485 def censorrevision(self, tr, censornode, tombstone=b''):
3489 def censorrevision(self, tr, censornode, tombstone=b''):
3486 if self._format_version == REVLOGV0:
3490 if self._format_version == REVLOGV0:
3487 raise error.RevlogError(
3491 raise error.RevlogError(
3488 _(b'cannot censor with version %d revlogs')
3492 _(b'cannot censor with version %d revlogs')
3489 % self._format_version
3493 % self._format_version
3490 )
3494 )
3491 elif self._format_version == REVLOGV1:
3495 elif self._format_version == REVLOGV1:
3492 rewrite.v1_censor(self, tr, censornode, tombstone)
3496 rewrite.v1_censor(self, tr, censornode, tombstone)
3493 else:
3497 else:
3494 rewrite.v2_censor(self, tr, censornode, tombstone)
3498 rewrite.v2_censor(self, tr, censornode, tombstone)
3495
3499
3496 def verifyintegrity(self, state):
3500 def verifyintegrity(self, state):
3497 """Verifies the integrity of the revlog.
3501 """Verifies the integrity of the revlog.
3498
3502
3499 Yields ``revlogproblem`` instances describing problems that are
3503 Yields ``revlogproblem`` instances describing problems that are
3500 found.
3504 found.
3501 """
3505 """
3502 dd, di = self.checksize()
3506 dd, di = self.checksize()
3503 if dd:
3507 if dd:
3504 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3508 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3505 if di:
3509 if di:
3506 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3510 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3507
3511
3508 version = self._format_version
3512 version = self._format_version
3509
3513
3510 # The verifier tells us what version revlog we should be.
3514 # The verifier tells us what version revlog we should be.
3511 if version != state[b'expectedversion']:
3515 if version != state[b'expectedversion']:
3512 yield revlogproblem(
3516 yield revlogproblem(
3513 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3517 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3514 % (self.display_id, version, state[b'expectedversion'])
3518 % (self.display_id, version, state[b'expectedversion'])
3515 )
3519 )
3516
3520
3517 state[b'skipread'] = set()
3521 state[b'skipread'] = set()
3518 state[b'safe_renamed'] = set()
3522 state[b'safe_renamed'] = set()
3519
3523
3520 for rev in self:
3524 for rev in self:
3521 node = self.node(rev)
3525 node = self.node(rev)
3522
3526
3523 # Verify contents. 4 cases to care about:
3527 # Verify contents. 4 cases to care about:
3524 #
3528 #
3525 # common: the most common case
3529 # common: the most common case
3526 # rename: with a rename
3530 # rename: with a rename
3527 # meta: file content starts with b'\1\n', the metadata
3531 # meta: file content starts with b'\1\n', the metadata
3528 # header defined in filelog.py, but without a rename
3532 # header defined in filelog.py, but without a rename
3529 # ext: content stored externally
3533 # ext: content stored externally
3530 #
3534 #
3531 # More formally, their differences are shown below:
3535 # More formally, their differences are shown below:
3532 #
3536 #
3533 # | common | rename | meta | ext
3537 # | common | rename | meta | ext
3534 # -------------------------------------------------------
3538 # -------------------------------------------------------
3535 # flags() | 0 | 0 | 0 | not 0
3539 # flags() | 0 | 0 | 0 | not 0
3536 # renamed() | False | True | False | ?
3540 # renamed() | False | True | False | ?
3537 # rawtext[0:2]=='\1\n'| False | True | True | ?
3541 # rawtext[0:2]=='\1\n'| False | True | True | ?
3538 #
3542 #
3539 # "rawtext" means the raw text stored in revlog data, which
3543 # "rawtext" means the raw text stored in revlog data, which
3540 # could be retrieved by "rawdata(rev)". "text"
3544 # could be retrieved by "rawdata(rev)". "text"
3541 # mentioned below is "revision(rev)".
3545 # mentioned below is "revision(rev)".
3542 #
3546 #
3543 # There are 3 different lengths stored physically:
3547 # There are 3 different lengths stored physically:
3544 # 1. L1: rawsize, stored in revlog index
3548 # 1. L1: rawsize, stored in revlog index
3545 # 2. L2: len(rawtext), stored in revlog data
3549 # 2. L2: len(rawtext), stored in revlog data
3546 # 3. L3: len(text), stored in revlog data if flags==0, or
3550 # 3. L3: len(text), stored in revlog data if flags==0, or
3547 # possibly somewhere else if flags!=0
3551 # possibly somewhere else if flags!=0
3548 #
3552 #
3549 # L1 should be equal to L2. L3 could be different from them.
3553 # L1 should be equal to L2. L3 could be different from them.
3550 # "text" may or may not affect commit hash depending on flag
3554 # "text" may or may not affect commit hash depending on flag
3551 # processors (see flagutil.addflagprocessor).
3555 # processors (see flagutil.addflagprocessor).
3552 #
3556 #
3553 # | common | rename | meta | ext
3557 # | common | rename | meta | ext
3554 # -------------------------------------------------
3558 # -------------------------------------------------
3555 # rawsize() | L1 | L1 | L1 | L1
3559 # rawsize() | L1 | L1 | L1 | L1
3556 # size() | L1 | L2-LM | L1(*) | L1 (?)
3560 # size() | L1 | L2-LM | L1(*) | L1 (?)
3557 # len(rawtext) | L2 | L2 | L2 | L2
3561 # len(rawtext) | L2 | L2 | L2 | L2
3558 # len(text) | L2 | L2 | L2 | L3
3562 # len(text) | L2 | L2 | L2 | L3
3559 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3563 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3560 #
3564 #
3561 # LM: length of metadata, depending on rawtext
3565 # LM: length of metadata, depending on rawtext
3562 # (*): not ideal, see comment in filelog.size
3566 # (*): not ideal, see comment in filelog.size
3563 # (?): could be "- len(meta)" if the resolved content has
3567 # (?): could be "- len(meta)" if the resolved content has
3564 # rename metadata
3568 # rename metadata
3565 #
3569 #
3566 # Checks needed to be done:
3570 # Checks needed to be done:
3567 # 1. length check: L1 == L2, in all cases.
3571 # 1. length check: L1 == L2, in all cases.
3568 # 2. hash check: depending on flag processor, we may need to
3572 # 2. hash check: depending on flag processor, we may need to
3569 # use either "text" (external), or "rawtext" (in revlog).
3573 # use either "text" (external), or "rawtext" (in revlog).
3570
3574
3571 try:
3575 try:
3572 skipflags = state.get(b'skipflags', 0)
3576 skipflags = state.get(b'skipflags', 0)
3573 if skipflags:
3577 if skipflags:
3574 skipflags &= self.flags(rev)
3578 skipflags &= self.flags(rev)
3575
3579
3576 _verify_revision(self, skipflags, state, node)
3580 _verify_revision(self, skipflags, state, node)
3577
3581
3578 l1 = self.rawsize(rev)
3582 l1 = self.rawsize(rev)
3579 l2 = len(self.rawdata(node))
3583 l2 = len(self.rawdata(node))
3580
3584
3581 if l1 != l2:
3585 if l1 != l2:
3582 yield revlogproblem(
3586 yield revlogproblem(
3583 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3587 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3584 node=node,
3588 node=node,
3585 )
3589 )
3586
3590
3587 except error.CensoredNodeError:
3591 except error.CensoredNodeError:
3588 if state[b'erroroncensored']:
3592 if state[b'erroroncensored']:
3589 yield revlogproblem(
3593 yield revlogproblem(
3590 error=_(b'censored file data'), node=node
3594 error=_(b'censored file data'), node=node
3591 )
3595 )
3592 state[b'skipread'].add(node)
3596 state[b'skipread'].add(node)
3593 except Exception as e:
3597 except Exception as e:
3594 yield revlogproblem(
3598 yield revlogproblem(
3595 error=_(b'unpacking %s: %s')
3599 error=_(b'unpacking %s: %s')
3596 % (short(node), stringutil.forcebytestr(e)),
3600 % (short(node), stringutil.forcebytestr(e)),
3597 node=node,
3601 node=node,
3598 )
3602 )
3599 state[b'skipread'].add(node)
3603 state[b'skipread'].add(node)
3600
3604
3601 def storageinfo(
3605 def storageinfo(
3602 self,
3606 self,
3603 exclusivefiles=False,
3607 exclusivefiles=False,
3604 sharedfiles=False,
3608 sharedfiles=False,
3605 revisionscount=False,
3609 revisionscount=False,
3606 trackedsize=False,
3610 trackedsize=False,
3607 storedsize=False,
3611 storedsize=False,
3608 ):
3612 ):
3609 d = {}
3613 d = {}
3610
3614
3611 if exclusivefiles:
3615 if exclusivefiles:
3612 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3616 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3613 if not self._inline:
3617 if not self._inline:
3614 d[b'exclusivefiles'].append((self.opener, self._datafile))
3618 d[b'exclusivefiles'].append((self.opener, self._datafile))
3615
3619
3616 if sharedfiles:
3620 if sharedfiles:
3617 d[b'sharedfiles'] = []
3621 d[b'sharedfiles'] = []
3618
3622
3619 if revisionscount:
3623 if revisionscount:
3620 d[b'revisionscount'] = len(self)
3624 d[b'revisionscount'] = len(self)
3621
3625
3622 if trackedsize:
3626 if trackedsize:
3623 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3627 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3624
3628
3625 if storedsize:
3629 if storedsize:
3626 d[b'storedsize'] = sum(
3630 d[b'storedsize'] = sum(
3627 self.opener.stat(path).st_size for path in self.files()
3631 self.opener.stat(path).st_size for path in self.files()
3628 )
3632 )
3629
3633
3630 return d
3634 return d
3631
3635
3632 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3636 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3633 if not self.hassidedata:
3637 if not self.hassidedata:
3634 return
3638 return
3635 # revlog formats with sidedata support does not support inline
3639 # revlog formats with sidedata support does not support inline
3636 assert not self._inline
3640 assert not self._inline
3637 if not helpers[1] and not helpers[2]:
3641 if not helpers[1] and not helpers[2]:
3638 # Nothing to generate or remove
3642 # Nothing to generate or remove
3639 return
3643 return
3640
3644
3641 new_entries = []
3645 new_entries = []
3642 # append the new sidedata
3646 # append the new sidedata
3643 with self._writing(transaction):
3647 with self._writing(transaction):
3644 ifh, dfh, sdfh = self._writinghandles
3648 ifh, dfh, sdfh = self._writinghandles
3645 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3649 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3646
3650
3647 current_offset = sdfh.tell()
3651 current_offset = sdfh.tell()
3648 for rev in range(startrev, endrev + 1):
3652 for rev in range(startrev, endrev + 1):
3649 entry = self.index[rev]
3653 entry = self.index[rev]
3650 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3654 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3651 store=self,
3655 store=self,
3652 sidedata_helpers=helpers,
3656 sidedata_helpers=helpers,
3653 sidedata={},
3657 sidedata={},
3654 rev=rev,
3658 rev=rev,
3655 )
3659 )
3656
3660
3657 serialized_sidedata = sidedatautil.serialize_sidedata(
3661 serialized_sidedata = sidedatautil.serialize_sidedata(
3658 new_sidedata
3662 new_sidedata
3659 )
3663 )
3660
3664
3661 sidedata_compression_mode = COMP_MODE_INLINE
3665 sidedata_compression_mode = COMP_MODE_INLINE
3662 if serialized_sidedata and self.hassidedata:
3666 if serialized_sidedata and self.hassidedata:
3663 sidedata_compression_mode = COMP_MODE_PLAIN
3667 sidedata_compression_mode = COMP_MODE_PLAIN
3664 h, comp_sidedata = self.compress(serialized_sidedata)
3668 h, comp_sidedata = self.compress(serialized_sidedata)
3665 if (
3669 if (
3666 h != b'u'
3670 h != b'u'
3667 and comp_sidedata[0] != b'\0'
3671 and comp_sidedata[0] != b'\0'
3668 and len(comp_sidedata) < len(serialized_sidedata)
3672 and len(comp_sidedata) < len(serialized_sidedata)
3669 ):
3673 ):
3670 assert not h
3674 assert not h
3671 if (
3675 if (
3672 comp_sidedata[0]
3676 comp_sidedata[0]
3673 == self._docket.default_compression_header
3677 == self._docket.default_compression_header
3674 ):
3678 ):
3675 sidedata_compression_mode = COMP_MODE_DEFAULT
3679 sidedata_compression_mode = COMP_MODE_DEFAULT
3676 serialized_sidedata = comp_sidedata
3680 serialized_sidedata = comp_sidedata
3677 else:
3681 else:
3678 sidedata_compression_mode = COMP_MODE_INLINE
3682 sidedata_compression_mode = COMP_MODE_INLINE
3679 serialized_sidedata = comp_sidedata
3683 serialized_sidedata = comp_sidedata
3680 if entry[8] != 0 or entry[9] != 0:
3684 if entry[8] != 0 or entry[9] != 0:
3681 # rewriting entries that already have sidedata is not
3685 # rewriting entries that already have sidedata is not
3682 # supported yet, because it introduces garbage data in the
3686 # supported yet, because it introduces garbage data in the
3683 # revlog.
3687 # revlog.
3684 msg = b"rewriting existing sidedata is not supported yet"
3688 msg = b"rewriting existing sidedata is not supported yet"
3685 raise error.Abort(msg)
3689 raise error.Abort(msg)
3686
3690
3687 # Apply (potential) flags to add and to remove after running
3691 # Apply (potential) flags to add and to remove after running
3688 # the sidedata helpers
3692 # the sidedata helpers
3689 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3693 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3690 entry_update = (
3694 entry_update = (
3691 current_offset,
3695 current_offset,
3692 len(serialized_sidedata),
3696 len(serialized_sidedata),
3693 new_offset_flags,
3697 new_offset_flags,
3694 sidedata_compression_mode,
3698 sidedata_compression_mode,
3695 )
3699 )
3696
3700
3697 # the sidedata computation might have move the file cursors around
3701 # the sidedata computation might have move the file cursors around
3698 sdfh.seek(current_offset, os.SEEK_SET)
3702 sdfh.seek(current_offset, os.SEEK_SET)
3699 sdfh.write(serialized_sidedata)
3703 sdfh.write(serialized_sidedata)
3700 new_entries.append(entry_update)
3704 new_entries.append(entry_update)
3701 current_offset += len(serialized_sidedata)
3705 current_offset += len(serialized_sidedata)
3702 self._docket.sidedata_end = sdfh.tell()
3706 self._docket.sidedata_end = sdfh.tell()
3703
3707
3704 # rewrite the new index entries
3708 # rewrite the new index entries
3705 ifh.seek(startrev * self.index.entry_size)
3709 ifh.seek(startrev * self.index.entry_size)
3706 for i, e in enumerate(new_entries):
3710 for i, e in enumerate(new_entries):
3707 rev = startrev + i
3711 rev = startrev + i
3708 self.index.replace_sidedata_info(rev, *e)
3712 self.index.replace_sidedata_info(rev, *e)
3709 packed = self.index.entry_binary(rev)
3713 packed = self.index.entry_binary(rev)
3710 if rev == 0 and self._docket is None:
3714 if rev == 0 and self._docket is None:
3711 header = self._format_flags | self._format_version
3715 header = self._format_flags | self._format_version
3712 header = self.index.pack_header(header)
3716 header = self.index.pack_header(header)
3713 packed = header + packed
3717 packed = header + packed
3714 ifh.write(packed)
3718 ifh.write(packed)
@@ -1,875 +1,875 b''
1 # censor code related to censoring revision
1 # censor code related to censoring revision
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 import binascii
10 import binascii
11 import contextlib
11 import contextlib
12 import os
12 import os
13 import struct
13 import struct
14
14
15 from ..node import (
15 from ..node import (
16 nullrev,
16 nullrev,
17 )
17 )
18 from .constants import (
18 from .constants import (
19 COMP_MODE_PLAIN,
19 COMP_MODE_PLAIN,
20 ENTRY_DATA_COMPRESSED_LENGTH,
20 ENTRY_DATA_COMPRESSED_LENGTH,
21 ENTRY_DATA_COMPRESSION_MODE,
21 ENTRY_DATA_COMPRESSION_MODE,
22 ENTRY_DATA_OFFSET,
22 ENTRY_DATA_OFFSET,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 ENTRY_DELTA_BASE,
24 ENTRY_DELTA_BASE,
25 ENTRY_LINK_REV,
25 ENTRY_LINK_REV,
26 ENTRY_NODE_ID,
26 ENTRY_NODE_ID,
27 ENTRY_PARENT_1,
27 ENTRY_PARENT_1,
28 ENTRY_PARENT_2,
28 ENTRY_PARENT_2,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 ENTRY_SIDEDATA_OFFSET,
31 ENTRY_SIDEDATA_OFFSET,
32 REVIDX_ISCENSORED,
32 REVIDX_ISCENSORED,
33 REVLOGV0,
33 REVLOGV0,
34 REVLOGV1,
34 REVLOGV1,
35 )
35 )
36 from ..i18n import _
36 from ..i18n import _
37
37
38 from .. import (
38 from .. import (
39 error,
39 error,
40 mdiff,
40 mdiff,
41 pycompat,
41 pycompat,
42 revlogutils,
42 revlogutils,
43 util,
43 util,
44 )
44 )
45 from ..utils import (
45 from ..utils import (
46 storageutil,
46 storageutil,
47 )
47 )
48 from . import (
48 from . import (
49 constants,
49 constants,
50 deltas,
50 deltas,
51 )
51 )
52
52
53
53
54 def v1_censor(rl, tr, censornode, tombstone=b''):
54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 """censors a revision in a "version 1" revlog"""
55 """censors a revision in a "version 1" revlog"""
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57
57
58 # avoid cycle
58 # avoid cycle
59 from .. import revlog
59 from .. import revlog
60
60
61 censorrev = rl.rev(censornode)
61 censorrev = rl.rev(censornode)
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63
63
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 # to create a new revlog, copy all revisions to it, then replace the
65 # to create a new revlog, copy all revisions to it, then replace the
66 # revlogs on transaction close.
66 # revlogs on transaction close.
67 #
67 #
68 # This is a bit dangerous. We could easily have a mismatch of state.
68 # This is a bit dangerous. We could easily have a mismatch of state.
69 newrl = revlog.revlog(
69 newrl = revlog.revlog(
70 rl.opener,
70 rl.opener,
71 target=rl.target,
71 target=rl.target,
72 radix=rl.radix,
72 radix=rl.radix,
73 postfix=b'tmpcensored',
73 postfix=b'tmpcensored',
74 censorable=True,
74 censorable=True,
75 )
75 )
76 newrl._format_version = rl._format_version
76 newrl._format_version = rl._format_version
77 newrl._format_flags = rl._format_flags
77 newrl._format_flags = rl._format_flags
78 newrl.delta_config.general_delta = rl.delta_config.general_delta
78 newrl.delta_config.general_delta = rl.delta_config.general_delta
79 newrl._parse_index = rl._parse_index
79 newrl._parse_index = rl._parse_index
80
80
81 for rev in rl.revs():
81 for rev in rl.revs():
82 node = rl.node(rev)
82 node = rl.node(rev)
83 p1, p2 = rl.parents(node)
83 p1, p2 = rl.parents(node)
84
84
85 if rev == censorrev:
85 if rev == censorrev:
86 newrl.addrawrevision(
86 newrl.addrawrevision(
87 tombstone,
87 tombstone,
88 tr,
88 tr,
89 rl.linkrev(censorrev),
89 rl.linkrev(censorrev),
90 p1,
90 p1,
91 p2,
91 p2,
92 censornode,
92 censornode,
93 constants.REVIDX_ISCENSORED,
93 constants.REVIDX_ISCENSORED,
94 )
94 )
95
95
96 if newrl.deltaparent(rev) != nullrev:
96 if newrl.deltaparent(rev) != nullrev:
97 m = _(b'censored revision stored as delta; cannot censor')
97 m = _(b'censored revision stored as delta; cannot censor')
98 h = _(
98 h = _(
99 b'censoring of revlogs is not fully implemented;'
99 b'censoring of revlogs is not fully implemented;'
100 b' please report this bug'
100 b' please report this bug'
101 )
101 )
102 raise error.Abort(m, hint=h)
102 raise error.Abort(m, hint=h)
103 continue
103 continue
104
104
105 if rl.iscensored(rev):
105 if rl.iscensored(rev):
106 if rl.deltaparent(rev) != nullrev:
106 if rl.deltaparent(rev) != nullrev:
107 m = _(
107 m = _(
108 b'cannot censor due to censored '
108 b'cannot censor due to censored '
109 b'revision having delta stored'
109 b'revision having delta stored'
110 )
110 )
111 raise error.Abort(m)
111 raise error.Abort(m)
112 rawtext = rl._chunk(rev)
112 rawtext = rl._chunk(rev)
113 else:
113 else:
114 rawtext = rl.rawdata(rev)
114 rawtext = rl.rawdata(rev)
115
115
116 newrl.addrawrevision(
116 newrl.addrawrevision(
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 )
118 )
119
119
120 tr.addbackup(rl._indexfile, location=b'store')
120 tr.addbackup(rl._indexfile, location=b'store')
121 if not rl._inline:
121 if not rl._inline:
122 tr.addbackup(rl._datafile, location=b'store')
122 tr.addbackup(rl._datafile, location=b'store')
123
123
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 if not rl._inline:
125 if not rl._inline:
126 rl.opener.rename(newrl._datafile, rl._datafile)
126 rl.opener.rename(newrl._datafile, rl._datafile)
127
127
128 rl.clearcaches()
128 rl.clearcaches()
129 rl._loadindex()
129 rl._loadindex()
130
130
131
131
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 """censors a revision in a "version 2" revlog"""
133 """censors a revision in a "version 2" revlog"""
134 assert revlog._format_version != REVLOGV0, revlog._format_version
134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
135 assert revlog._format_version != REVLOGV1, revlog._format_version
136
136
137 censor_revs = {revlog.rev(censornode)}
137 censor_revs = {revlog.rev(censornode)}
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139
139
140
140
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 """rewrite a revlog to censor some of its content
142 """rewrite a revlog to censor some of its content
143
143
144 General principle
144 General principle
145
145
146 We create new revlog files (index/data/sidedata) to copy the content of
146 We create new revlog files (index/data/sidedata) to copy the content of
147 the existing data without the censored data.
147 the existing data without the censored data.
148
148
149 We need to recompute new delta for any revision that used the censored
149 We need to recompute new delta for any revision that used the censored
150 revision as delta base. As the cumulative size of the new delta may be
150 revision as delta base. As the cumulative size of the new delta may be
151 large, we store them in a temporary file until they are stored in their
151 large, we store them in a temporary file until they are stored in their
152 final destination.
152 final destination.
153
153
154 All data before the censored data can be blindly copied. The rest needs
154 All data before the censored data can be blindly copied. The rest needs
155 to be copied as we go and the associated index entry needs adjustement.
155 to be copied as we go and the associated index entry needs adjustement.
156 """
156 """
157 assert revlog._format_version != REVLOGV0, revlog._format_version
157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
158 assert revlog._format_version != REVLOGV1, revlog._format_version
159
159
160 old_index = revlog.index
160 old_index = revlog.index
161 docket = revlog._docket
161 docket = revlog._docket
162
162
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164
164
165 first_excl_rev = min(censor_revs)
165 first_excl_rev = min(censor_revs)
166
166
167 first_excl_entry = revlog.index[first_excl_rev]
167 first_excl_entry = revlog.index[first_excl_rev]
168 index_cutoff = revlog.index.entry_size * first_excl_rev
168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171
171
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 rewritten_entries = _precompute_rewritten_delta(
174 rewritten_entries = _precompute_rewritten_delta(
175 revlog,
175 revlog,
176 old_index,
176 old_index,
177 censor_revs,
177 censor_revs,
178 tmp_storage,
178 tmp_storage,
179 )
179 )
180
180
181 all_files = _setup_new_files(
181 all_files = _setup_new_files(
182 revlog,
182 revlog,
183 index_cutoff,
183 index_cutoff,
184 data_cutoff,
184 data_cutoff,
185 sidedata_cutoff,
185 sidedata_cutoff,
186 )
186 )
187
187
188 # we dont need to open the old index file since its content already
188 # we dont need to open the old index file since its content already
189 # exist in a usable form in `old_index`.
189 # exist in a usable form in `old_index`.
190 with all_files() as open_files:
190 with all_files() as open_files:
191 (
191 (
192 old_data_file,
192 old_data_file,
193 old_sidedata_file,
193 old_sidedata_file,
194 new_index_file,
194 new_index_file,
195 new_data_file,
195 new_data_file,
196 new_sidedata_file,
196 new_sidedata_file,
197 ) = open_files
197 ) = open_files
198
198
199 # writing the censored revision
199 # writing the censored revision
200
200
201 # Writing all subsequent revisions
201 # Writing all subsequent revisions
202 for rev in range(first_excl_rev, len(old_index)):
202 for rev in range(first_excl_rev, len(old_index)):
203 if rev in censor_revs:
203 if rev in censor_revs:
204 _rewrite_censor(
204 _rewrite_censor(
205 revlog,
205 revlog,
206 old_index,
206 old_index,
207 open_files,
207 open_files,
208 rev,
208 rev,
209 tombstone,
209 tombstone,
210 )
210 )
211 else:
211 else:
212 _rewrite_simple(
212 _rewrite_simple(
213 revlog,
213 revlog,
214 old_index,
214 old_index,
215 open_files,
215 open_files,
216 rev,
216 rev,
217 rewritten_entries,
217 rewritten_entries,
218 tmp_storage,
218 tmp_storage,
219 )
219 )
220 docket.write(transaction=None, stripping=True)
220 docket.write(transaction=None, stripping=True)
221
221
222
222
223 def _precompute_rewritten_delta(
223 def _precompute_rewritten_delta(
224 revlog,
224 revlog,
225 old_index,
225 old_index,
226 excluded_revs,
226 excluded_revs,
227 tmp_storage,
227 tmp_storage,
228 ):
228 ):
229 """Compute new delta for revisions whose delta is based on revision that
229 """Compute new delta for revisions whose delta is based on revision that
230 will not survive as is.
230 will not survive as is.
231
231
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 """
233 """
234 dc = deltas.deltacomputer(revlog)
234 dc = deltas.deltacomputer(revlog)
235 rewritten_entries = {}
235 rewritten_entries = {}
236 first_excl_rev = min(excluded_revs)
236 first_excl_rev = min(excluded_revs)
237 with revlog.reading():
237 with revlog.reading():
238 for rev in range(first_excl_rev, len(old_index)):
238 for rev in range(first_excl_rev, len(old_index)):
239 if rev in excluded_revs:
239 if rev in excluded_revs:
240 # this revision will be preserved as is, so we don't need to
240 # this revision will be preserved as is, so we don't need to
241 # consider recomputing a delta.
241 # consider recomputing a delta.
242 continue
242 continue
243 entry = old_index[rev]
243 entry = old_index[rev]
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 continue
245 continue
246 # This is a revision that use the censored revision as the base
246 # This is a revision that use the censored revision as the base
247 # for its delta. We need a need new deltas
247 # for its delta. We need a need new deltas
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 # this revision is empty, we can delta against nullrev
249 # this revision is empty, we can delta against nullrev
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 else:
251 else:
252
252
253 text = revlog.rawdata(rev)
253 text = revlog.rawdata(rev)
254 info = revlogutils.revisioninfo(
254 info = revlogutils.revisioninfo(
255 node=entry[ENTRY_NODE_ID],
255 node=entry[ENTRY_NODE_ID],
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 btext=[text],
258 btext=[text],
259 textlen=len(text),
259 textlen=len(text),
260 cachedelta=None,
260 cachedelta=None,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 )
262 )
263 d = dc.finddeltainfo(
263 d = dc.finddeltainfo(
264 info, excluded_bases=excluded_revs, target_rev=rev
264 info, excluded_bases=excluded_revs, target_rev=rev
265 )
265 )
266 default_comp = revlog._docket.default_compression_header
266 default_comp = revlog._docket.default_compression_header
267 comp_mode, d = deltas.delta_compression(default_comp, d)
267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 # using `tell` is a bit lazy, but we are not here for speed
268 # using `tell` is a bit lazy, but we are not here for speed
269 start = tmp_storage.tell()
269 start = tmp_storage.tell()
270 tmp_storage.write(d.data[1])
270 tmp_storage.write(d.data[1])
271 end = tmp_storage.tell()
271 end = tmp_storage.tell()
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 return rewritten_entries
273 return rewritten_entries
274
274
275
275
276 def _setup_new_files(
276 def _setup_new_files(
277 revlog,
277 revlog,
278 index_cutoff,
278 index_cutoff,
279 data_cutoff,
279 data_cutoff,
280 sidedata_cutoff,
280 sidedata_cutoff,
281 ):
281 ):
282 """
282 """
283
283
284 return a context manager to open all the relevant files:
284 return a context manager to open all the relevant files:
285 - old_data_file,
285 - old_data_file,
286 - old_sidedata_file,
286 - old_sidedata_file,
287 - new_index_file,
287 - new_index_file,
288 - new_data_file,
288 - new_data_file,
289 - new_sidedata_file,
289 - new_sidedata_file,
290
290
291 The old_index_file is not here because it is accessed through the
291 The old_index_file is not here because it is accessed through the
292 `old_index` object if the caller function.
292 `old_index` object if the caller function.
293 """
293 """
294 docket = revlog._docket
294 docket = revlog._docket
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298
298
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302
302
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 util.copyfile(
305 util.copyfile(
306 old_sidedata_filepath,
306 old_sidedata_filepath,
307 new_sidedata_filepath,
307 new_sidedata_filepath,
308 nb_bytes=sidedata_cutoff,
308 nb_bytes=sidedata_cutoff,
309 )
309 )
310 revlog.opener.register_file(docket.index_filepath())
310 revlog.opener.register_file(docket.index_filepath())
311 revlog.opener.register_file(docket.data_filepath())
311 revlog.opener.register_file(docket.data_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
312 revlog.opener.register_file(docket.sidedata_filepath())
313
313
314 docket.index_end = index_cutoff
314 docket.index_end = index_cutoff
315 docket.data_end = data_cutoff
315 docket.data_end = data_cutoff
316 docket.sidedata_end = sidedata_cutoff
316 docket.sidedata_end = sidedata_cutoff
317
317
318 # reload the revlog internal information
318 # reload the revlog internal information
319 revlog.clearcaches()
319 revlog.clearcaches()
320 revlog._loadindex(docket=docket)
320 revlog._loadindex(docket=docket)
321
321
322 @contextlib.contextmanager
322 @contextlib.contextmanager
323 def all_files_opener():
323 def all_files_opener():
324 # hide opening in an helper function to please check-code, black
324 # hide opening in an helper function to please check-code, black
325 # and various python version at the same time
325 # and various python version at the same time
326 with open(old_data_filepath, 'rb') as old_data_file:
326 with open(old_data_filepath, 'rb') as old_data_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
328 with open(new_index_filepath, 'r+b') as new_index_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
329 with open(new_data_filepath, 'r+b') as new_data_file:
330 with open(
330 with open(
331 new_sidedata_filepath, 'r+b'
331 new_sidedata_filepath, 'r+b'
332 ) as new_sidedata_file:
332 ) as new_sidedata_file:
333 new_index_file.seek(0, os.SEEK_END)
333 new_index_file.seek(0, os.SEEK_END)
334 assert new_index_file.tell() == index_cutoff
334 assert new_index_file.tell() == index_cutoff
335 new_data_file.seek(0, os.SEEK_END)
335 new_data_file.seek(0, os.SEEK_END)
336 assert new_data_file.tell() == data_cutoff
336 assert new_data_file.tell() == data_cutoff
337 new_sidedata_file.seek(0, os.SEEK_END)
337 new_sidedata_file.seek(0, os.SEEK_END)
338 assert new_sidedata_file.tell() == sidedata_cutoff
338 assert new_sidedata_file.tell() == sidedata_cutoff
339 yield (
339 yield (
340 old_data_file,
340 old_data_file,
341 old_sidedata_file,
341 old_sidedata_file,
342 new_index_file,
342 new_index_file,
343 new_data_file,
343 new_data_file,
344 new_sidedata_file,
344 new_sidedata_file,
345 )
345 )
346
346
347 return all_files_opener
347 return all_files_opener
348
348
349
349
350 def _rewrite_simple(
350 def _rewrite_simple(
351 revlog,
351 revlog,
352 old_index,
352 old_index,
353 all_files,
353 all_files,
354 rev,
354 rev,
355 rewritten_entries,
355 rewritten_entries,
356 tmp_storage,
356 tmp_storage,
357 ):
357 ):
358 """append a normal revision to the index after the rewritten one(s)"""
358 """append a normal revision to the index after the rewritten one(s)"""
359 (
359 (
360 old_data_file,
360 old_data_file,
361 old_sidedata_file,
361 old_sidedata_file,
362 new_index_file,
362 new_index_file,
363 new_data_file,
363 new_data_file,
364 new_sidedata_file,
364 new_sidedata_file,
365 ) = all_files
365 ) = all_files
366 entry = old_index[rev]
366 entry = old_index[rev]
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369
369
370 if rev not in rewritten_entries:
370 if rev not in rewritten_entries:
371 old_data_file.seek(old_data_offset)
371 old_data_file.seek(old_data_offset)
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 new_data = old_data_file.read(new_data_size)
373 new_data = old_data_file.read(new_data_size)
374 data_delta_base = entry[ENTRY_DELTA_BASE]
374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 else:
376 else:
377 (
377 (
378 data_delta_base,
378 data_delta_base,
379 start,
379 start,
380 end,
380 end,
381 d_comp_mode,
381 d_comp_mode,
382 ) = rewritten_entries[rev]
382 ) = rewritten_entries[rev]
383 new_data_size = end - start
383 new_data_size = end - start
384 tmp_storage.seek(start)
384 tmp_storage.seek(start)
385 new_data = tmp_storage.read(new_data_size)
385 new_data = tmp_storage.read(new_data_size)
386
386
387 # It might be faster to group continuous read/write operation,
387 # It might be faster to group continuous read/write operation,
388 # however, this is censor, an operation that is not focussed
388 # however, this is censor, an operation that is not focussed
389 # around stellar performance. So I have not written this
389 # around stellar performance. So I have not written this
390 # optimisation yet.
390 # optimisation yet.
391 new_data_offset = new_data_file.tell()
391 new_data_offset = new_data_file.tell()
392 new_data_file.write(new_data)
392 new_data_file.write(new_data)
393
393
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 new_sidedata_offset = new_sidedata_file.tell()
395 new_sidedata_offset = new_sidedata_file.tell()
396 if 0 < sidedata_size:
396 if 0 < sidedata_size:
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 old_sidedata_file.seek(old_sidedata_offset)
398 old_sidedata_file.seek(old_sidedata_offset)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 new_sidedata_file.write(new_sidedata)
400 new_sidedata_file.write(new_sidedata)
401
401
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 assert data_delta_base <= rev, (data_delta_base, rev)
404 assert data_delta_base <= rev, (data_delta_base, rev)
405
405
406 new_entry = revlogutils.entry(
406 new_entry = revlogutils.entry(
407 flags=flags,
407 flags=flags,
408 data_offset=new_data_offset,
408 data_offset=new_data_offset,
409 data_compressed_length=new_data_size,
409 data_compressed_length=new_data_size,
410 data_uncompressed_length=data_uncompressed_length,
410 data_uncompressed_length=data_uncompressed_length,
411 data_delta_base=data_delta_base,
411 data_delta_base=data_delta_base,
412 link_rev=entry[ENTRY_LINK_REV],
412 link_rev=entry[ENTRY_LINK_REV],
413 parent_rev_1=entry[ENTRY_PARENT_1],
413 parent_rev_1=entry[ENTRY_PARENT_1],
414 parent_rev_2=entry[ENTRY_PARENT_2],
414 parent_rev_2=entry[ENTRY_PARENT_2],
415 node_id=entry[ENTRY_NODE_ID],
415 node_id=entry[ENTRY_NODE_ID],
416 sidedata_offset=new_sidedata_offset,
416 sidedata_offset=new_sidedata_offset,
417 sidedata_compressed_length=sidedata_size,
417 sidedata_compressed_length=sidedata_size,
418 data_compression_mode=d_comp_mode,
418 data_compression_mode=d_comp_mode,
419 sidedata_compression_mode=sd_com_mode,
419 sidedata_compression_mode=sd_com_mode,
420 )
420 )
421 revlog.index.append(new_entry)
421 revlog.index.append(new_entry)
422 entry_bin = revlog.index.entry_binary(rev)
422 entry_bin = revlog.index.entry_binary(rev)
423 new_index_file.write(entry_bin)
423 new_index_file.write(entry_bin)
424
424
425 revlog._docket.index_end = new_index_file.tell()
425 revlog._docket.index_end = new_index_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
426 revlog._docket.data_end = new_data_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428
428
429
429
430 def _rewrite_censor(
430 def _rewrite_censor(
431 revlog,
431 revlog,
432 old_index,
432 old_index,
433 all_files,
433 all_files,
434 rev,
434 rev,
435 tombstone,
435 tombstone,
436 ):
436 ):
437 """rewrite and append a censored revision"""
437 """rewrite and append a censored revision"""
438 (
438 (
439 old_data_file,
439 old_data_file,
440 old_sidedata_file,
440 old_sidedata_file,
441 new_index_file,
441 new_index_file,
442 new_data_file,
442 new_data_file,
443 new_sidedata_file,
443 new_sidedata_file,
444 ) = all_files
444 ) = all_files
445 entry = old_index[rev]
445 entry = old_index[rev]
446
446
447 # XXX consider trying the default compression too
447 # XXX consider trying the default compression too
448 new_data_size = len(tombstone)
448 new_data_size = len(tombstone)
449 new_data_offset = new_data_file.tell()
449 new_data_offset = new_data_file.tell()
450 new_data_file.write(tombstone)
450 new_data_file.write(tombstone)
451
451
452 # we are not adding any sidedata as they might leak info about the censored version
452 # we are not adding any sidedata as they might leak info about the censored version
453
453
454 link_rev = entry[ENTRY_LINK_REV]
454 link_rev = entry[ENTRY_LINK_REV]
455
455
456 p1 = entry[ENTRY_PARENT_1]
456 p1 = entry[ENTRY_PARENT_1]
457 p2 = entry[ENTRY_PARENT_2]
457 p2 = entry[ENTRY_PARENT_2]
458
458
459 new_entry = revlogutils.entry(
459 new_entry = revlogutils.entry(
460 flags=constants.REVIDX_ISCENSORED,
460 flags=constants.REVIDX_ISCENSORED,
461 data_offset=new_data_offset,
461 data_offset=new_data_offset,
462 data_compressed_length=new_data_size,
462 data_compressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
463 data_uncompressed_length=new_data_size,
464 data_delta_base=rev,
464 data_delta_base=rev,
465 link_rev=link_rev,
465 link_rev=link_rev,
466 parent_rev_1=p1,
466 parent_rev_1=p1,
467 parent_rev_2=p2,
467 parent_rev_2=p2,
468 node_id=entry[ENTRY_NODE_ID],
468 node_id=entry[ENTRY_NODE_ID],
469 sidedata_offset=0,
469 sidedata_offset=0,
470 sidedata_compressed_length=0,
470 sidedata_compressed_length=0,
471 data_compression_mode=COMP_MODE_PLAIN,
471 data_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 )
473 )
474 revlog.index.append(new_entry)
474 revlog.index.append(new_entry)
475 entry_bin = revlog.index.entry_binary(rev)
475 entry_bin = revlog.index.entry_binary(rev)
476 new_index_file.write(entry_bin)
476 new_index_file.write(entry_bin)
477 revlog._docket.index_end = new_index_file.tell()
477 revlog._docket.index_end = new_index_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
478 revlog._docket.data_end = new_data_file.tell()
479
479
480
480
481 def _get_filename_from_filelog_index(path):
481 def _get_filename_from_filelog_index(path):
482 # Drop the extension and the `data/` prefix
482 # Drop the extension and the `data/` prefix
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 if len(path_part) < 2:
484 if len(path_part) < 2:
485 msg = _(b"cannot recognize filelog from filename: '%s'")
485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 msg %= path
486 msg %= path
487 raise error.Abort(msg)
487 raise error.Abort(msg)
488
488
489 return path_part[1]
489 return path_part[1]
490
490
491
491
492 def _filelog_from_filename(repo, path):
492 def _filelog_from_filename(repo, path):
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494
494
495 from .. import filelog # avoid cycle
495 from .. import filelog # avoid cycle
496
496
497 fl = filelog.filelog(repo.svfs, path)
497 fl = filelog.filelog(repo.svfs, path)
498 return fl
498 return fl
499
499
500
500
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 from ..pure import parsers # avoid cycle
503 from ..pure import parsers # avoid cycle
504
504
505 if repo._currentlock(repo._lockref) is None:
505 if repo._currentlock(repo._lockref) is None:
506 # Let's be paranoid about it
506 # Let's be paranoid about it
507 msg = "repo needs to be locked to rewrite parents"
507 msg = "repo needs to be locked to rewrite parents"
508 raise error.ProgrammingError(msg)
508 raise error.ProgrammingError(msg)
509
509
510 index_format = parsers.IndexObject.index_format
510 index_format = parsers.IndexObject.index_format
511 entry = rl.index[rev]
511 entry = rl.index[rev]
512 new_entry = list(entry)
512 new_entry = list(entry)
513 new_entry[5], new_entry[6] = entry[6], entry[5]
513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 packed = index_format.pack(*new_entry[:8])
514 packed = index_format.pack(*new_entry[:8])
515 fp.seek(offset)
515 fp.seek(offset)
516 fp.write(packed)
516 fp.write(packed)
517
517
518
518
519 def _reorder_filelog_parents(repo, fl, to_fix):
519 def _reorder_filelog_parents(repo, fl, to_fix):
520 """
520 """
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 new version to disk, overwriting the old one with a rename.
522 new version to disk, overwriting the old one with a rename.
523 """
523 """
524 from ..pure import parsers # avoid cycle
524 from ..pure import parsers # avoid cycle
525
525
526 ui = repo.ui
526 ui = repo.ui
527 assert len(to_fix) > 0
527 assert len(to_fix) > 0
528 rl = fl._revlog
528 rl = fl._revlog
529 if rl._format_version != constants.REVLOGV1:
529 if rl._format_version != constants.REVLOGV1:
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 raise error.ProgrammingError(msg)
531 raise error.ProgrammingError(msg)
532
532
533 index_file = rl._indexfile
533 index_file = rl._indexfile
534 new_file_path = index_file + b'.tmp-parents-fix'
534 new_file_path = index_file + b'.tmp-parents-fix'
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536
536
537 with ui.uninterruptible():
537 with ui.uninterruptible():
538 try:
538 try:
539 util.copyfile(
539 util.copyfile(
540 rl.opener.join(index_file),
540 rl.opener.join(index_file),
541 rl.opener.join(new_file_path),
541 rl.opener.join(new_file_path),
542 checkambig=rl._checkambig,
542 checkambig=rl.data_config.check_ambig,
543 )
543 )
544
544
545 with rl.opener(new_file_path, mode=b"r+") as fp:
545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 if rl._inline:
546 if rl._inline:
547 index = parsers.InlinedIndexObject(fp.read())
547 index = parsers.InlinedIndexObject(fp.read())
548 for rev in fl.revs():
548 for rev in fl.revs():
549 if rev in to_fix:
549 if rev in to_fix:
550 offset = index._calculate_index(rev)
550 offset = index._calculate_index(rev)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 ui.write(repaired_msg % (rev, index_file))
552 ui.write(repaired_msg % (rev, index_file))
553 else:
553 else:
554 index_format = parsers.IndexObject.index_format
554 index_format = parsers.IndexObject.index_format
555 for rev in to_fix:
555 for rev in to_fix:
556 offset = rev * index_format.size
556 offset = rev * index_format.size
557 _write_swapped_parents(repo, rl, rev, offset, fp)
557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 ui.write(repaired_msg % (rev, index_file))
558 ui.write(repaired_msg % (rev, index_file))
559
559
560 rl.opener.rename(new_file_path, index_file)
560 rl.opener.rename(new_file_path, index_file)
561 rl.clearcaches()
561 rl.clearcaches()
562 rl._loadindex()
562 rl._loadindex()
563 finally:
563 finally:
564 util.tryunlink(new_file_path)
564 util.tryunlink(new_file_path)
565
565
566
566
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 full_text = lambda: fl._revlog.rawdata(filerev)
568 full_text = lambda: fl._revlog.rawdata(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 return _is_revision_affected_inner(
570 return _is_revision_affected_inner(
571 full_text, parent_revs, filerev, metadata_cache
571 full_text, parent_revs, filerev, metadata_cache
572 )
572 )
573
573
574
574
575 def _is_revision_affected_inner(
575 def _is_revision_affected_inner(
576 full_text,
576 full_text,
577 parents_revs,
577 parents_revs,
578 filerev,
578 filerev,
579 metadata_cache=None,
579 metadata_cache=None,
580 ):
580 ):
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 special meaning compared to the reverse in the context of filelog-based
582 special meaning compared to the reverse in the context of filelog-based
583 copytracing. issue6528 exists because new code assumed that parent ordering
583 copytracing. issue6528 exists because new code assumed that parent ordering
584 didn't matter, so this detects if the revision contains metadata (since
584 didn't matter, so this detects if the revision contains metadata (since
585 it's only used for filelog-based copytracing) and its parents are in the
585 it's only used for filelog-based copytracing) and its parents are in the
586 "wrong" order."""
586 "wrong" order."""
587 try:
587 try:
588 raw_text = full_text()
588 raw_text = full_text()
589 except error.CensoredNodeError:
589 except error.CensoredNodeError:
590 # We don't care about censored nodes as they never carry metadata
590 # We don't care about censored nodes as they never carry metadata
591 return False
591 return False
592
592
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 if metadata_cache is not None:
595 if metadata_cache is not None:
596 metadata_cache[filerev] = has_meta
596 metadata_cache[filerev] = has_meta
597 if has_meta:
597 if has_meta:
598 (p1, p2) = parents_revs()
598 (p1, p2) = parents_revs()
599 if p1 != nullrev and p2 == nullrev:
599 if p1 != nullrev and p2 == nullrev:
600 return True
600 return True
601 return False
601 return False
602
602
603
603
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 rl = fl._revlog
605 rl = fl._revlog
606 is_censored = lambda: rl.iscensored(filerev)
606 is_censored = lambda: rl.iscensored(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
607 delta_base = lambda: rl.deltaparent(filerev)
608 delta = lambda: rl._chunk(filerev)
608 delta = lambda: rl._chunk(filerev)
609 full_text = lambda: rl.rawdata(filerev)
609 full_text = lambda: rl.rawdata(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
610 parent_revs = lambda: rl.parentrevs(filerev)
611 return _is_revision_affected_fast_inner(
611 return _is_revision_affected_fast_inner(
612 is_censored,
612 is_censored,
613 delta_base,
613 delta_base,
614 delta,
614 delta,
615 full_text,
615 full_text,
616 parent_revs,
616 parent_revs,
617 filerev,
617 filerev,
618 metadata_cache,
618 metadata_cache,
619 )
619 )
620
620
621
621
622 def _is_revision_affected_fast_inner(
622 def _is_revision_affected_fast_inner(
623 is_censored,
623 is_censored,
624 delta_base,
624 delta_base,
625 delta,
625 delta,
626 full_text,
626 full_text,
627 parent_revs,
627 parent_revs,
628 filerev,
628 filerev,
629 metadata_cache,
629 metadata_cache,
630 ):
630 ):
631 """Optimization fast-path for `_is_revision_affected`.
631 """Optimization fast-path for `_is_revision_affected`.
632
632
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 revision to check if its base has metadata, saving computation of the full
634 revision to check if its base has metadata, saving computation of the full
635 text, instead looking at the current delta.
635 text, instead looking at the current delta.
636
636
637 This optimization only works if the revisions are looked at in order."""
637 This optimization only works if the revisions are looked at in order."""
638
638
639 if is_censored():
639 if is_censored():
640 # Censored revisions don't contain metadata, so they cannot be affected
640 # Censored revisions don't contain metadata, so they cannot be affected
641 metadata_cache[filerev] = False
641 metadata_cache[filerev] = False
642 return False
642 return False
643
643
644 p1, p2 = parent_revs()
644 p1, p2 = parent_revs()
645 if p1 == nullrev or p2 != nullrev:
645 if p1 == nullrev or p2 != nullrev:
646 return False
646 return False
647
647
648 delta_parent = delta_base()
648 delta_parent = delta_base()
649 parent_has_metadata = metadata_cache.get(delta_parent)
649 parent_has_metadata = metadata_cache.get(delta_parent)
650 if parent_has_metadata is None:
650 if parent_has_metadata is None:
651 return _is_revision_affected_inner(
651 return _is_revision_affected_inner(
652 full_text,
652 full_text,
653 parent_revs,
653 parent_revs,
654 filerev,
654 filerev,
655 metadata_cache,
655 metadata_cache,
656 )
656 )
657
657
658 chunk = delta()
658 chunk = delta()
659 if not len(chunk):
659 if not len(chunk):
660 # No diff for this revision
660 # No diff for this revision
661 return parent_has_metadata
661 return parent_has_metadata
662
662
663 header_length = 12
663 header_length = 12
664 if len(chunk) < header_length:
664 if len(chunk) < header_length:
665 raise error.Abort(_(b"patch cannot be decoded"))
665 raise error.Abort(_(b"patch cannot be decoded"))
666
666
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668
668
669 if start < 2: # len(b'\x01\n') == 2
669 if start < 2: # len(b'\x01\n') == 2
670 # This delta does *something* to the metadata marker (if any).
670 # This delta does *something* to the metadata marker (if any).
671 # Check it the slow way
671 # Check it the slow way
672 is_affected = _is_revision_affected_inner(
672 is_affected = _is_revision_affected_inner(
673 full_text,
673 full_text,
674 parent_revs,
674 parent_revs,
675 filerev,
675 filerev,
676 metadata_cache,
676 metadata_cache,
677 )
677 )
678 return is_affected
678 return is_affected
679
679
680 # The diff did not remove or add the metadata header, it's then in the same
680 # The diff did not remove or add the metadata header, it's then in the same
681 # situation as its parent
681 # situation as its parent
682 metadata_cache[filerev] = parent_has_metadata
682 metadata_cache[filerev] = parent_has_metadata
683 return parent_has_metadata
683 return parent_has_metadata
684
684
685
685
686 def _from_report(ui, repo, context, from_report, dry_run):
686 def _from_report(ui, repo, context, from_report, dry_run):
687 """
687 """
688 Fix the revisions given in the `from_report` file, but still checks if the
688 Fix the revisions given in the `from_report` file, but still checks if the
689 revisions are indeed affected to prevent an unfortunate cyclic situation
689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 where we'd swap well-ordered parents again.
690 where we'd swap well-ordered parents again.
691
691
692 See the doc for `debug_fix_issue6528` for the format documentation.
692 See the doc for `debug_fix_issue6528` for the format documentation.
693 """
693 """
694 ui.write(_(b"loading report file '%s'\n") % from_report)
694 ui.write(_(b"loading report file '%s'\n") % from_report)
695
695
696 with context(), open(from_report, mode='rb') as f:
696 with context(), open(from_report, mode='rb') as f:
697 for line in f.read().split(b'\n'):
697 for line in f.read().split(b'\n'):
698 if not line:
698 if not line:
699 continue
699 continue
700 filenodes, filename = line.split(b' ', 1)
700 filenodes, filename = line.split(b' ', 1)
701 fl = _filelog_from_filename(repo, filename)
701 fl = _filelog_from_filename(repo, filename)
702 to_fix = set(
702 to_fix = set(
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 )
704 )
705 excluded = set()
705 excluded = set()
706
706
707 for filerev in to_fix:
707 for filerev in to_fix:
708 if _is_revision_affected(fl, filerev):
708 if _is_revision_affected(fl, filerev):
709 msg = b"found affected revision %d for filelog '%s'\n"
709 msg = b"found affected revision %d for filelog '%s'\n"
710 ui.warn(msg % (filerev, filename))
710 ui.warn(msg % (filerev, filename))
711 else:
711 else:
712 msg = _(b"revision %s of file '%s' is not affected\n")
712 msg = _(b"revision %s of file '%s' is not affected\n")
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 ui.warn(msg)
714 ui.warn(msg)
715 excluded.add(filerev)
715 excluded.add(filerev)
716
716
717 to_fix = to_fix - excluded
717 to_fix = to_fix - excluded
718 if not to_fix:
718 if not to_fix:
719 msg = _(b"no affected revisions were found for '%s'\n")
719 msg = _(b"no affected revisions were found for '%s'\n")
720 ui.write(msg % filename)
720 ui.write(msg % filename)
721 continue
721 continue
722 if not dry_run:
722 if not dry_run:
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724
724
725
725
726 def filter_delta_issue6528(revlog, deltas_iter):
726 def filter_delta_issue6528(revlog, deltas_iter):
727 """filter incomind deltas to repaire issue 6528 on the fly"""
727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 metadata_cache = {}
728 metadata_cache = {}
729
729
730 deltacomputer = deltas.deltacomputer(revlog)
730 deltacomputer = deltas.deltacomputer(revlog)
731
731
732 for rev, d in enumerate(deltas_iter, len(revlog)):
732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 (
733 (
734 node,
734 node,
735 p1_node,
735 p1_node,
736 p2_node,
736 p2_node,
737 linknode,
737 linknode,
738 deltabase,
738 deltabase,
739 delta,
739 delta,
740 flags,
740 flags,
741 sidedata,
741 sidedata,
742 ) = d
742 ) = d
743
743
744 if not revlog.index.has_node(deltabase):
744 if not revlog.index.has_node(deltabase):
745 raise error.LookupError(
745 raise error.LookupError(
746 deltabase, revlog.radix, _(b'unknown parent')
746 deltabase, revlog.radix, _(b'unknown parent')
747 )
747 )
748 base_rev = revlog.rev(deltabase)
748 base_rev = revlog.rev(deltabase)
749 if not revlog.index.has_node(p1_node):
749 if not revlog.index.has_node(p1_node):
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 p1_rev = revlog.rev(p1_node)
751 p1_rev = revlog.rev(p1_node)
752 if not revlog.index.has_node(p2_node):
752 if not revlog.index.has_node(p2_node):
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 p2_rev = revlog.rev(p2_node)
754 p2_rev = revlog.rev(p2_node)
755
755
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 delta_base = lambda: revlog.rev(delta_base)
757 delta_base = lambda: revlog.rev(delta_base)
758 delta_base = lambda: base_rev
758 delta_base = lambda: base_rev
759 parent_revs = lambda: (p1_rev, p2_rev)
759 parent_revs = lambda: (p1_rev, p2_rev)
760
760
761 def full_text():
761 def full_text():
762 # note: being able to reuse the full text computation in the
762 # note: being able to reuse the full text computation in the
763 # underlying addrevision would be useful however this is a bit too
763 # underlying addrevision would be useful however this is a bit too
764 # intrusive the for the "quick" issue6528 we are writing before the
764 # intrusive the for the "quick" issue6528 we are writing before the
765 # 5.8 release
765 # 5.8 release
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767
767
768 revinfo = revlogutils.revisioninfo(
768 revinfo = revlogutils.revisioninfo(
769 node,
769 node,
770 p1_node,
770 p1_node,
771 p2_node,
771 p2_node,
772 [None],
772 [None],
773 textlen,
773 textlen,
774 (base_rev, delta),
774 (base_rev, delta),
775 flags,
775 flags,
776 )
776 )
777 return deltacomputer.buildtext(revinfo)
777 return deltacomputer.buildtext(revinfo)
778
778
779 is_affected = _is_revision_affected_fast_inner(
779 is_affected = _is_revision_affected_fast_inner(
780 is_censored,
780 is_censored,
781 delta_base,
781 delta_base,
782 lambda: delta,
782 lambda: delta,
783 full_text,
783 full_text,
784 parent_revs,
784 parent_revs,
785 rev,
785 rev,
786 metadata_cache,
786 metadata_cache,
787 )
787 )
788 if is_affected:
788 if is_affected:
789 d = (
789 d = (
790 node,
790 node,
791 p2_node,
791 p2_node,
792 p1_node,
792 p1_node,
793 linknode,
793 linknode,
794 deltabase,
794 deltabase,
795 delta,
795 delta,
796 flags,
796 flags,
797 sidedata,
797 sidedata,
798 )
798 )
799 yield d
799 yield d
800
800
801
801
802 def repair_issue6528(
802 def repair_issue6528(
803 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
803 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
804 ):
804 ):
805 @contextlib.contextmanager
805 @contextlib.contextmanager
806 def context():
806 def context():
807 if dry_run or to_report: # No need for locking
807 if dry_run or to_report: # No need for locking
808 yield
808 yield
809 else:
809 else:
810 with repo.wlock(), repo.lock():
810 with repo.wlock(), repo.lock():
811 yield
811 yield
812
812
813 if from_report:
813 if from_report:
814 return _from_report(ui, repo, context, from_report, dry_run)
814 return _from_report(ui, repo, context, from_report, dry_run)
815
815
816 report_entries = []
816 report_entries = []
817
817
818 with context():
818 with context():
819 files = list(
819 files = list(
820 entry
820 entry
821 for entry in repo.store.data_entries()
821 for entry in repo.store.data_entries()
822 if entry.is_revlog and entry.is_filelog
822 if entry.is_revlog and entry.is_filelog
823 )
823 )
824
824
825 progress = ui.makeprogress(
825 progress = ui.makeprogress(
826 _(b"looking for affected revisions"),
826 _(b"looking for affected revisions"),
827 unit=_(b"filelogs"),
827 unit=_(b"filelogs"),
828 total=len(files),
828 total=len(files),
829 )
829 )
830 found_nothing = True
830 found_nothing = True
831
831
832 for entry in files:
832 for entry in files:
833 progress.increment()
833 progress.increment()
834 filename = entry.target_id
834 filename = entry.target_id
835 fl = _filelog_from_filename(repo, entry.target_id)
835 fl = _filelog_from_filename(repo, entry.target_id)
836
836
837 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
837 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
838 to_fix = set()
838 to_fix = set()
839 metadata_cache = {}
839 metadata_cache = {}
840 for filerev in fl.revs():
840 for filerev in fl.revs():
841 affected = _is_revision_affected_fast(
841 affected = _is_revision_affected_fast(
842 repo, fl, filerev, metadata_cache
842 repo, fl, filerev, metadata_cache
843 )
843 )
844 if paranoid:
844 if paranoid:
845 slow = _is_revision_affected(fl, filerev)
845 slow = _is_revision_affected(fl, filerev)
846 if slow != affected:
846 if slow != affected:
847 msg = _(b"paranoid check failed for '%s' at node %s")
847 msg = _(b"paranoid check failed for '%s' at node %s")
848 node = binascii.hexlify(fl.node(filerev))
848 node = binascii.hexlify(fl.node(filerev))
849 raise error.Abort(msg % (filename, node))
849 raise error.Abort(msg % (filename, node))
850 if affected:
850 if affected:
851 msg = b"found affected revision %d for file '%s'\n"
851 msg = b"found affected revision %d for file '%s'\n"
852 ui.warn(msg % (filerev, filename))
852 ui.warn(msg % (filerev, filename))
853 found_nothing = False
853 found_nothing = False
854 if not dry_run:
854 if not dry_run:
855 if to_report:
855 if to_report:
856 to_fix.add(binascii.hexlify(fl.node(filerev)))
856 to_fix.add(binascii.hexlify(fl.node(filerev)))
857 else:
857 else:
858 to_fix.add(filerev)
858 to_fix.add(filerev)
859
859
860 if to_fix:
860 if to_fix:
861 to_fix = sorted(to_fix)
861 to_fix = sorted(to_fix)
862 if to_report:
862 if to_report:
863 report_entries.append((filename, to_fix))
863 report_entries.append((filename, to_fix))
864 else:
864 else:
865 _reorder_filelog_parents(repo, fl, to_fix)
865 _reorder_filelog_parents(repo, fl, to_fix)
866
866
867 if found_nothing:
867 if found_nothing:
868 ui.write(_(b"no affected revisions were found\n"))
868 ui.write(_(b"no affected revisions were found\n"))
869
869
870 if to_report and report_entries:
870 if to_report and report_entries:
871 with open(to_report, mode="wb") as f:
871 with open(to_report, mode="wb") as f:
872 for path, to_fix in report_entries:
872 for path, to_fix in report_entries:
873 f.write(b"%s %s\n" % (b",".join(to_fix), path))
873 f.write(b"%s %s\n" % (b",".join(to_fix), path))
874
874
875 progress.complete()
875 progress.complete()
General Comments 0
You need to be logged in to leave comments. Login now