##// END OF EJS Templates
revlog: overwrite revlog config through copy of the config object...
marmoute -
r51923:177e7d6b default
parent child Browse files
Show More
@@ -1,3731 +1,3740 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
245 def copy(self):
246 return self.__class__(**self.__dict__)
247
248
244 @attr.s()
249 @attr.s()
245 class FeatureConfig:
250 class FeatureConfig(_Config):
246 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
247
252
248 # the default compression engine
253 # the default compression engine
249 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
250 # compression engines options
255 # compression engines options
251 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
252
257
253 # can we use censor on this revlog
258 # can we use censor on this revlog
254 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
255 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
256 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
257 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
258 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
259 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
260 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
261 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
262 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
263 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
264 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
265 # can ellipsis commit be used
270 # can ellipsis commit be used
266 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
267
272
273 def copy(self):
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
277
268
278
269 @attr.s()
279 @attr.s()
270 class DataConfig:
280 class DataConfig(_Config):
271 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
272
282
273 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
274 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
275 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
276 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
277 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
278 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
279 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
280
290
281 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
282 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
283 # how much data is large
293 # how much data is large
284 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
285 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
286 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
287
297
288 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
289 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
290 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
291 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
292 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
293 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
294
304
295 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
296 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
297
307
298
308
299 @attr.s()
309 @attr.s()
300 class DeltaConfig:
310 class DeltaConfig(_Config):
301 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
302
312
303 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
304 self contained.
314 self contained.
305 """
315 """
306
316
307 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
308 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
309 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
310 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
311 # maximum length of a delta chain
321 # maximum length of a delta chain
312 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
313 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
314 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
315 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
316 # compression for the data content.
326 # compression for the data content.
317 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
318 # Should we try a delta against both parent
328 # Should we try a delta against both parent
319 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
320 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
321 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
322 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
323 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
324 # trust incoming delta by default
334 # trust incoming delta by default
325 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
326 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
327 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
328
338
329
339
330 class revlog:
340 class revlog:
331 """
341 """
332 the underlying revision storage object
342 the underlying revision storage object
333
343
334 A revlog consists of two parts, an index and the revision data.
344 A revlog consists of two parts, an index and the revision data.
335
345
336 The index is a file with a fixed record size containing
346 The index is a file with a fixed record size containing
337 information on each revision, including its nodeid (hash), the
347 information on each revision, including its nodeid (hash), the
338 nodeids of its parents, the position and offset of its data within
348 nodeids of its parents, the position and offset of its data within
339 the data file, and the revision it's based on. Finally, each entry
349 the data file, and the revision it's based on. Finally, each entry
340 contains a linkrev entry that can serve as a pointer to external
350 contains a linkrev entry that can serve as a pointer to external
341 data.
351 data.
342
352
343 The revision data itself is a linear collection of data chunks.
353 The revision data itself is a linear collection of data chunks.
344 Each chunk represents a revision and is usually represented as a
354 Each chunk represents a revision and is usually represented as a
345 delta against the previous chunk. To bound lookup time, runs of
355 delta against the previous chunk. To bound lookup time, runs of
346 deltas are limited to about 2 times the length of the original
356 deltas are limited to about 2 times the length of the original
347 version data. This makes retrieval of a version proportional to
357 version data. This makes retrieval of a version proportional to
348 its size, or O(1) relative to the number of revisions.
358 its size, or O(1) relative to the number of revisions.
349
359
350 Both pieces of the revlog are written to in an append-only
360 Both pieces of the revlog are written to in an append-only
351 fashion, which means we never need to rewrite a file to insert or
361 fashion, which means we never need to rewrite a file to insert or
352 remove data, and can use some simple techniques to avoid the need
362 remove data, and can use some simple techniques to avoid the need
353 for locking while reading.
363 for locking while reading.
354
364
355 If checkambig, indexfile is opened with checkambig=True at
365 If checkambig, indexfile is opened with checkambig=True at
356 writing, to avoid file stat ambiguity.
366 writing, to avoid file stat ambiguity.
357
367
358 If mmaplargeindex is True, and an mmapindexthreshold is set, the
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
359 index will be mmapped rather than read if it is larger than the
369 index will be mmapped rather than read if it is larger than the
360 configured threshold.
370 configured threshold.
361
371
362 If censorable is True, the revlog can have censored revisions.
372 If censorable is True, the revlog can have censored revisions.
363
373
364 If `upperboundcomp` is not None, this is the expected maximal gain from
374 If `upperboundcomp` is not None, this is the expected maximal gain from
365 compression for the data content.
375 compression for the data content.
366
376
367 `concurrencychecker` is an optional function that receives 3 arguments: a
377 `concurrencychecker` is an optional function that receives 3 arguments: a
368 file handle, a filename, and an expected position. It should check whether
378 file handle, a filename, and an expected position. It should check whether
369 the current position in the file handle is valid, and log/warn/fail (by
379 the current position in the file handle is valid, and log/warn/fail (by
370 raising).
380 raising).
371
381
372 See mercurial/revlogutils/contants.py for details about the content of an
382 See mercurial/revlogutils/contants.py for details about the content of an
373 index entry.
383 index entry.
374 """
384 """
375
385
376 _flagserrorclass = error.RevlogError
386 _flagserrorclass = error.RevlogError
377
387
378 @staticmethod
388 @staticmethod
379 def is_inline_index(header_bytes):
389 def is_inline_index(header_bytes):
380 """Determine if a revlog is inline from the initial bytes of the index"""
390 """Determine if a revlog is inline from the initial bytes of the index"""
381 header = INDEX_HEADER.unpack(header_bytes)[0]
391 header = INDEX_HEADER.unpack(header_bytes)[0]
382
392
383 _format_flags = header & ~0xFFFF
393 _format_flags = header & ~0xFFFF
384 _format_version = header & 0xFFFF
394 _format_version = header & 0xFFFF
385
395
386 features = FEATURES_BY_VERSION[_format_version]
396 features = FEATURES_BY_VERSION[_format_version]
387 return features[b'inline'](_format_flags)
397 return features[b'inline'](_format_flags)
388
398
389 def __init__(
399 def __init__(
390 self,
400 self,
391 opener,
401 opener,
392 target,
402 target,
393 radix,
403 radix,
394 postfix=None, # only exist for `tmpcensored` now
404 postfix=None, # only exist for `tmpcensored` now
395 checkambig=False,
405 checkambig=False,
396 mmaplargeindex=False,
406 mmaplargeindex=False,
397 censorable=False,
407 censorable=False,
398 upperboundcomp=None,
408 upperboundcomp=None,
399 persistentnodemap=False,
409 persistentnodemap=False,
400 concurrencychecker=None,
410 concurrencychecker=None,
401 trypending=False,
411 trypending=False,
402 try_split=False,
412 try_split=False,
403 canonical_parent_order=True,
413 canonical_parent_order=True,
404 ):
414 ):
405 """
415 """
406 create a revlog object
416 create a revlog object
407
417
408 opener is a function that abstracts the file opening operation
418 opener is a function that abstracts the file opening operation
409 and can be used to implement COW semantics or the like.
419 and can be used to implement COW semantics or the like.
410
420
411 `target`: a (KIND, ID) tuple that identify the content stored in
421 `target`: a (KIND, ID) tuple that identify the content stored in
412 this revlog. It help the rest of the code to understand what the revlog
422 this revlog. It help the rest of the code to understand what the revlog
413 is about without having to resort to heuristic and index filename
423 is about without having to resort to heuristic and index filename
414 analysis. Note: that this must be reliably be set by normal code, but
424 analysis. Note: that this must be reliably be set by normal code, but
415 that test, debug, or performance measurement code might not set this to
425 that test, debug, or performance measurement code might not set this to
416 accurate value.
426 accurate value.
417 """
427 """
418 self.upperboundcomp = upperboundcomp
428 self.upperboundcomp = upperboundcomp
419
429
420 self.radix = radix
430 self.radix = radix
421
431
422 self._docket_file = None
432 self._docket_file = None
423 self._indexfile = None
433 self._indexfile = None
424 self._datafile = None
434 self._datafile = None
425 self._sidedatafile = None
435 self._sidedatafile = None
426 self._nodemap_file = None
436 self._nodemap_file = None
427 self.postfix = postfix
437 self.postfix = postfix
428 self._trypending = trypending
438 self._trypending = trypending
429 self._try_split = try_split
439 self._try_split = try_split
430 self.opener = opener
440 self.opener = opener
431 if persistentnodemap:
441 if persistentnodemap:
432 self._nodemap_file = nodemaputil.get_nodemap_file(self)
442 self._nodemap_file = nodemaputil.get_nodemap_file(self)
433
443
434 assert target[0] in ALL_KINDS
444 assert target[0] in ALL_KINDS
435 assert len(target) == 2
445 assert len(target) == 2
436 self.target = target
446 self.target = target
437 self.feature_config = FeatureConfig(
447 self.feature_config = FeatureConfig(
438 censorable=censorable,
448 censorable=censorable,
439 canonical_parent_order=canonical_parent_order,
449 canonical_parent_order=canonical_parent_order,
440 )
450 )
441 self.data_config = DataConfig(
451 self.data_config = DataConfig(
442 check_ambig=checkambig,
452 check_ambig=checkambig,
443 mmap_large_index=mmaplargeindex,
453 mmap_large_index=mmaplargeindex,
444 )
454 )
445 self.delta_config = DeltaConfig()
455 self.delta_config = DeltaConfig()
446
456
447 # 3-tuple of (node, rev, text) for a raw revision.
457 # 3-tuple of (node, rev, text) for a raw revision.
448 self._revisioncache = None
458 self._revisioncache = None
449 # Maps rev to chain base rev.
459 # Maps rev to chain base rev.
450 self._chainbasecache = util.lrucachedict(100)
460 self._chainbasecache = util.lrucachedict(100)
451 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
461 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
452 self._chunkcache = (0, b'')
462 self._chunkcache = (0, b'')
453
463
454 self.index = None
464 self.index = None
455 self._docket = None
465 self._docket = None
456 self._nodemap_docket = None
466 self._nodemap_docket = None
457 # Mapping of partial identifiers to full nodes.
467 # Mapping of partial identifiers to full nodes.
458 self._pcache = {}
468 self._pcache = {}
459
469
460 # other optionnals features
470 # other optionnals features
461
471
462 # Make copy of flag processors so each revlog instance can support
472 # Make copy of flag processors so each revlog instance can support
463 # custom flags.
473 # custom flags.
464 self._flagprocessors = dict(flagutil.flagprocessors)
474 self._flagprocessors = dict(flagutil.flagprocessors)
465
475
466 # 3-tuple of file handles being used for active writing.
476 # 3-tuple of file handles being used for active writing.
467 self._writinghandles = None
477 self._writinghandles = None
468 # prevent nesting of addgroup
478 # prevent nesting of addgroup
469 self._adding_group = None
479 self._adding_group = None
470
480
471 self._loadindex()
481 self._loadindex()
472
482
473 self._concurrencychecker = concurrencychecker
483 self._concurrencychecker = concurrencychecker
474
484
475 @property
485 @property
476 def _generaldelta(self):
486 def _generaldelta(self):
477 """temporary compatibility proxy"""
487 """temporary compatibility proxy"""
478 return self.delta_config.general_delta
488 return self.delta_config.general_delta
479
489
480 @property
490 @property
481 def _checkambig(self):
491 def _checkambig(self):
482 """temporary compatibility proxy"""
492 """temporary compatibility proxy"""
483 return self.data_config.check_ambig
493 return self.data_config.check_ambig
484
494
485 @property
495 @property
486 def _mmaplargeindex(self):
496 def _mmaplargeindex(self):
487 """temporary compatibility proxy"""
497 """temporary compatibility proxy"""
488 return self.data_config.mmap_large_index
498 return self.data_config.mmap_large_index
489
499
490 @property
500 @property
491 def _censorable(self):
501 def _censorable(self):
492 """temporary compatibility proxy"""
502 """temporary compatibility proxy"""
493 return self.feature_config.censorable
503 return self.feature_config.censorable
494
504
495 @property
505 @property
496 def _chunkcachesize(self):
506 def _chunkcachesize(self):
497 """temporary compatibility proxy"""
507 """temporary compatibility proxy"""
498 return self.data_config.chunk_cache_size
508 return self.data_config.chunk_cache_size
499
509
500 @property
510 @property
501 def _maxchainlen(self):
511 def _maxchainlen(self):
502 """temporary compatibility proxy"""
512 """temporary compatibility proxy"""
503 return self.delta_config.max_chain_len
513 return self.delta_config.max_chain_len
504
514
505 @property
515 @property
506 def _deltabothparents(self):
516 def _deltabothparents(self):
507 """temporary compatibility proxy"""
517 """temporary compatibility proxy"""
508 return self.delta_config.delta_both_parents
518 return self.delta_config.delta_both_parents
509
519
510 @property
520 @property
511 def _candidate_group_chunk_size(self):
521 def _candidate_group_chunk_size(self):
512 """temporary compatibility proxy"""
522 """temporary compatibility proxy"""
513 return self.delta_config.candidate_group_chunk_size
523 return self.delta_config.candidate_group_chunk_size
514
524
515 @property
525 @property
516 def _debug_delta(self):
526 def _debug_delta(self):
517 """temporary compatibility proxy"""
527 """temporary compatibility proxy"""
518 return self.delta_config.debug_delta
528 return self.delta_config.debug_delta
519
529
520 @property
530 @property
521 def _compengine(self):
531 def _compengine(self):
522 """temporary compatibility proxy"""
532 """temporary compatibility proxy"""
523 return self.feature_config.compression_engine
533 return self.feature_config.compression_engine
524
534
525 @property
535 @property
526 def _compengineopts(self):
536 def _compengineopts(self):
527 """temporary compatibility proxy"""
537 """temporary compatibility proxy"""
528 return self.feature_config.compression_engine_options
538 return self.feature_config.compression_engine_options
529
539
530 @property
540 @property
531 def _maxdeltachainspan(self):
541 def _maxdeltachainspan(self):
532 """temporary compatibility proxy"""
542 """temporary compatibility proxy"""
533 return self.delta_config.max_deltachain_span
543 return self.delta_config.max_deltachain_span
534
544
535 @property
545 @property
536 def _withsparseread(self):
546 def _withsparseread(self):
537 """temporary compatibility proxy"""
547 """temporary compatibility proxy"""
538 return self.data_config.with_sparse_read
548 return self.data_config.with_sparse_read
539
549
540 @property
550 @property
541 def _sparserevlog(self):
551 def _sparserevlog(self):
542 """temporary compatibility proxy"""
552 """temporary compatibility proxy"""
543 return self.delta_config.sparse_revlog
553 return self.delta_config.sparse_revlog
544
554
545 @property
555 @property
546 def hassidedata(self):
556 def hassidedata(self):
547 """temporary compatibility proxy"""
557 """temporary compatibility proxy"""
548 return self.feature_config.has_side_data
558 return self.feature_config.has_side_data
549
559
550 @property
560 @property
551 def _srdensitythreshold(self):
561 def _srdensitythreshold(self):
552 """temporary compatibility proxy"""
562 """temporary compatibility proxy"""
553 return self.data_config.sr_density_threshold
563 return self.data_config.sr_density_threshold
554
564
555 @property
565 @property
556 def _srmingapsize(self):
566 def _srmingapsize(self):
557 """temporary compatibility proxy"""
567 """temporary compatibility proxy"""
558 return self.data_config.sr_min_gap_size
568 return self.data_config.sr_min_gap_size
559
569
560 @property
570 @property
561 def _compute_rank(self):
571 def _compute_rank(self):
562 """temporary compatibility proxy"""
572 """temporary compatibility proxy"""
563 return self.feature_config.compute_rank
573 return self.feature_config.compute_rank
564
574
565 @property
575 @property
566 def canonical_parent_order(self):
576 def canonical_parent_order(self):
567 """temporary compatibility proxy"""
577 """temporary compatibility proxy"""
568 return self.feature_config.canonical_parent_order
578 return self.feature_config.canonical_parent_order
569
579
570 @property
580 @property
571 def _lazydelta(self):
581 def _lazydelta(self):
572 """temporary compatibility proxy"""
582 """temporary compatibility proxy"""
573 return self.delta_config.lazy_delta
583 return self.delta_config.lazy_delta
574
584
575 @property
585 @property
576 def _lazydeltabase(self):
586 def _lazydeltabase(self):
577 """temporary compatibility proxy"""
587 """temporary compatibility proxy"""
578 return self.delta_config.lazy_delta_base
588 return self.delta_config.lazy_delta_base
579
589
580 def _init_opts(self):
590 def _init_opts(self):
581 """process options (from above/config) to setup associated default revlog mode
591 """process options (from above/config) to setup associated default revlog mode
582
592
583 These values might be affected when actually reading on disk information.
593 These values might be affected when actually reading on disk information.
584
594
585 The relevant values are returned for use in _loadindex().
595 The relevant values are returned for use in _loadindex().
586
596
587 * newversionflags:
597 * newversionflags:
588 version header to use if we need to create a new revlog
598 version header to use if we need to create a new revlog
589
599
590 * mmapindexthreshold:
600 * mmapindexthreshold:
591 minimal index size for start to use mmap
601 minimal index size for start to use mmap
592
602
593 * force_nodemap:
603 * force_nodemap:
594 force the usage of a "development" version of the nodemap code
604 force the usage of a "development" version of the nodemap code
595 """
605 """
596 mmapindexthreshold = None
606 mmapindexthreshold = None
597 opts = self.opener.options
607 opts = self.opener.options
598
608
599 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
609 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
600 new_header = CHANGELOGV2
610 new_header = CHANGELOGV2
601 compute_rank = opts.get(b'changelogv2.compute-rank', True)
611 compute_rank = opts.get(b'changelogv2.compute-rank', True)
602 self.feature_config.compute_rank = compute_rank
612 self.feature_config.compute_rank = compute_rank
603 elif b'revlogv2' in opts:
613 elif b'revlogv2' in opts:
604 new_header = REVLOGV2
614 new_header = REVLOGV2
605 elif b'revlogv1' in opts:
615 elif b'revlogv1' in opts:
606 new_header = REVLOGV1 | FLAG_INLINE_DATA
616 new_header = REVLOGV1 | FLAG_INLINE_DATA
607 if b'generaldelta' in opts:
617 if b'generaldelta' in opts:
608 new_header |= FLAG_GENERALDELTA
618 new_header |= FLAG_GENERALDELTA
609 elif b'revlogv0' in self.opener.options:
619 elif b'revlogv0' in self.opener.options:
610 new_header = REVLOGV0
620 new_header = REVLOGV0
611 else:
621 else:
612 new_header = REVLOG_DEFAULT_VERSION
622 new_header = REVLOG_DEFAULT_VERSION
613
623
614 if b'chunkcachesize' in opts:
624 if b'chunkcachesize' in opts:
615 self.data_config.chunk_cache_size = opts[b'chunkcachesize']
625 self.data_config.chunk_cache_size = opts[b'chunkcachesize']
616 if b'maxchainlen' in opts:
626 if b'maxchainlen' in opts:
617 self.delta_config.max_chain_len = opts[b'maxchainlen']
627 self.delta_config.max_chain_len = opts[b'maxchainlen']
618 if b'deltabothparents' in opts:
628 if b'deltabothparents' in opts:
619 self.delta_config.delta_both_parents = opts[b'deltabothparents']
629 self.delta_config.delta_both_parents = opts[b'deltabothparents']
620 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
630 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
621 if dps_cgds:
631 if dps_cgds:
622 self.delta_config.candidate_group_chunk_size = dps_cgds
632 self.delta_config.candidate_group_chunk_size = dps_cgds
623 if b'lazydelta' in opts:
633 if b'lazydelta' in opts:
624 self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
634 self.delta_config.lazy_delta = bool(opts[b'lazydelta'])
625 if self._lazydelta and b'lazydeltabase' in opts:
635 if self._lazydelta and b'lazydeltabase' in opts:
626 self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
636 self.delta_config.lazy_delta_base = opts[b'lazydeltabase']
627 if b'debug-delta' in opts:
637 if b'debug-delta' in opts:
628 self.delta_config.debug_delta = opts[b'debug-delta']
638 self.delta_config.debug_delta = opts[b'debug-delta']
629 if b'compengine' in opts:
639 if b'compengine' in opts:
630 self.feature_config.compression_engine = opts[b'compengine']
640 self.feature_config.compression_engine = opts[b'compengine']
631 comp_engine_opts = self.feature_config.compression_engine_options
641 comp_engine_opts = self.feature_config.compression_engine_options
632 if b'zlib.level' in opts:
642 if b'zlib.level' in opts:
633 comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
643 comp_engine_opts[b'zlib.level'] = opts[b'zlib.level']
634 if b'zstd.level' in opts:
644 if b'zstd.level' in opts:
635 comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
645 comp_engine_opts[b'zstd.level'] = opts[b'zstd.level']
636 if b'maxdeltachainspan' in opts:
646 if b'maxdeltachainspan' in opts:
637 self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
647 self.delta_config.max_deltachain_span = opts[b'maxdeltachainspan']
638 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
648 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
639 mmapindexthreshold = opts[b'mmapindexthreshold']
649 mmapindexthreshold = opts[b'mmapindexthreshold']
640 self.data_config.mmap_index_threshold = mmapindexthreshold
650 self.data_config.mmap_index_threshold = mmapindexthreshold
641 if b'sparse-revlog' in opts:
651 if b'sparse-revlog' in opts:
642 self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
652 self.delta_config.sparse_revlog = bool(opts[b'sparse-revlog'])
643 if self.delta_config.sparse_revlog:
653 if self.delta_config.sparse_revlog:
644 # sparse-revlog forces sparse-read
654 # sparse-revlog forces sparse-read
645 self.data_config.with_sparse_read = True
655 self.data_config.with_sparse_read = True
646 elif b'with-sparse-read' in opts:
656 elif b'with-sparse-read' in opts:
647 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
657 self.data_config.with_sparse_read = bool(opts[b'with-sparse-read'])
648 if b'sparse-read-density-threshold' in opts:
658 if b'sparse-read-density-threshold' in opts:
649 self.data_config.sr_density_threshold = opts[
659 self.data_config.sr_density_threshold = opts[
650 b'sparse-read-density-threshold'
660 b'sparse-read-density-threshold'
651 ]
661 ]
652 if b'sparse-read-min-gap-size' in opts:
662 if b'sparse-read-min-gap-size' in opts:
653 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
663 self.data_config.sr_min_gap_size = opts[b'sparse-read-min-gap-size']
654 if opts.get(b'enableellipsis'):
664 if opts.get(b'enableellipsis'):
655 self.feature_config.enable_ellipsis = True
665 self.feature_config.enable_ellipsis = True
656 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
666 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
657
667
658 # revlog v0 doesn't have flag processors
668 # revlog v0 doesn't have flag processors
659 for flag, processor in opts.get(b'flagprocessors', {}).items():
669 for flag, processor in opts.get(b'flagprocessors', {}).items():
660 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
670 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
661
671
662 chunk_cache_size = self.data_config.chunk_cache_size
672 chunk_cache_size = self.data_config.chunk_cache_size
663 if chunk_cache_size <= 0:
673 if chunk_cache_size <= 0:
664 raise error.RevlogError(
674 raise error.RevlogError(
665 _(b'revlog chunk cache size %r is not greater than 0')
675 _(b'revlog chunk cache size %r is not greater than 0')
666 % chunk_cache_size
676 % chunk_cache_size
667 )
677 )
668 elif chunk_cache_size & (chunk_cache_size - 1):
678 elif chunk_cache_size & (chunk_cache_size - 1):
669 raise error.RevlogError(
679 raise error.RevlogError(
670 _(b'revlog chunk cache size %r is not a power of 2')
680 _(b'revlog chunk cache size %r is not a power of 2')
671 % chunk_cache_size
681 % chunk_cache_size
672 )
682 )
673 force_nodemap = opts.get(b'devel-force-nodemap', False)
683 force_nodemap = opts.get(b'devel-force-nodemap', False)
674 return new_header, mmapindexthreshold, force_nodemap
684 return new_header, mmapindexthreshold, force_nodemap
675
685
676 def _get_data(self, filepath, mmap_threshold, size=None):
686 def _get_data(self, filepath, mmap_threshold, size=None):
677 """return a file content with or without mmap
687 """return a file content with or without mmap
678
688
679 If the file is missing return the empty string"""
689 If the file is missing return the empty string"""
680 try:
690 try:
681 with self.opener(filepath) as fp:
691 with self.opener(filepath) as fp:
682 if mmap_threshold is not None:
692 if mmap_threshold is not None:
683 file_size = self.opener.fstat(fp).st_size
693 file_size = self.opener.fstat(fp).st_size
684 if file_size >= mmap_threshold:
694 if file_size >= mmap_threshold:
685 if size is not None:
695 if size is not None:
686 # avoid potentiel mmap crash
696 # avoid potentiel mmap crash
687 size = min(file_size, size)
697 size = min(file_size, size)
688 # TODO: should .close() to release resources without
698 # TODO: should .close() to release resources without
689 # relying on Python GC
699 # relying on Python GC
690 if size is None:
700 if size is None:
691 return util.buffer(util.mmapread(fp))
701 return util.buffer(util.mmapread(fp))
692 else:
702 else:
693 return util.buffer(util.mmapread(fp, size))
703 return util.buffer(util.mmapread(fp, size))
694 if size is None:
704 if size is None:
695 return fp.read()
705 return fp.read()
696 else:
706 else:
697 return fp.read(size)
707 return fp.read(size)
698 except FileNotFoundError:
708 except FileNotFoundError:
699 return b''
709 return b''
700
710
701 def get_streams(self, max_linkrev, force_inline=False):
711 def get_streams(self, max_linkrev, force_inline=False):
702 """return a list of streams that represent this revlog
712 """return a list of streams that represent this revlog
703
713
704 This is used by stream-clone to do bytes to bytes copies of a repository.
714 This is used by stream-clone to do bytes to bytes copies of a repository.
705
715
706 This streams data for all revisions that refer to a changelog revision up
716 This streams data for all revisions that refer to a changelog revision up
707 to `max_linkrev`.
717 to `max_linkrev`.
708
718
709 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
719 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
710
720
711 It returns is a list of three-tuple:
721 It returns is a list of three-tuple:
712
722
713 [
723 [
714 (filename, bytes_stream, stream_size),
724 (filename, bytes_stream, stream_size),
715 …
725 …
716 ]
726 ]
717 """
727 """
718 n = len(self)
728 n = len(self)
719 index = self.index
729 index = self.index
720 while n > 0:
730 while n > 0:
721 linkrev = index[n - 1][4]
731 linkrev = index[n - 1][4]
722 if linkrev < max_linkrev:
732 if linkrev < max_linkrev:
723 break
733 break
724 # note: this loop will rarely go through multiple iterations, since
734 # note: this loop will rarely go through multiple iterations, since
725 # it only traverses commits created during the current streaming
735 # it only traverses commits created during the current streaming
726 # pull operation.
736 # pull operation.
727 #
737 #
728 # If this become a problem, using a binary search should cap the
738 # If this become a problem, using a binary search should cap the
729 # runtime of this.
739 # runtime of this.
730 n = n - 1
740 n = n - 1
731 if n == 0:
741 if n == 0:
732 # no data to send
742 # no data to send
733 return []
743 return []
734 index_size = n * index.entry_size
744 index_size = n * index.entry_size
735 data_size = self.end(n - 1)
745 data_size = self.end(n - 1)
736
746
737 # XXX we might have been split (or stripped) since the object
747 # XXX we might have been split (or stripped) since the object
738 # initialization, We need to close this race too, but having a way to
748 # initialization, We need to close this race too, but having a way to
739 # pre-open the file we feed to the revlog and never closing them before
749 # pre-open the file we feed to the revlog and never closing them before
740 # we are done streaming.
750 # we are done streaming.
741
751
742 if self._inline:
752 if self._inline:
743
753
744 def get_stream():
754 def get_stream():
745 with self._indexfp() as fp:
755 with self._indexfp() as fp:
746 yield None
756 yield None
747 size = index_size + data_size
757 size = index_size + data_size
748 if size <= 65536:
758 if size <= 65536:
749 yield fp.read(size)
759 yield fp.read(size)
750 else:
760 else:
751 yield from util.filechunkiter(fp, limit=size)
761 yield from util.filechunkiter(fp, limit=size)
752
762
753 inline_stream = get_stream()
763 inline_stream = get_stream()
754 next(inline_stream)
764 next(inline_stream)
755 return [
765 return [
756 (self._indexfile, inline_stream, index_size + data_size),
766 (self._indexfile, inline_stream, index_size + data_size),
757 ]
767 ]
758 elif force_inline:
768 elif force_inline:
759
769
760 def get_stream():
770 def get_stream():
761 with self.reading():
771 with self.reading():
762 yield None
772 yield None
763
773
764 for rev in range(n):
774 for rev in range(n):
765 idx = self.index.entry_binary(rev)
775 idx = self.index.entry_binary(rev)
766 if rev == 0 and self._docket is None:
776 if rev == 0 and self._docket is None:
767 # re-inject the inline flag
777 # re-inject the inline flag
768 header = self._format_flags
778 header = self._format_flags
769 header |= self._format_version
779 header |= self._format_version
770 header |= FLAG_INLINE_DATA
780 header |= FLAG_INLINE_DATA
771 header = self.index.pack_header(header)
781 header = self.index.pack_header(header)
772 idx = header + idx
782 idx = header + idx
773 yield idx
783 yield idx
774 yield self._getsegmentforrevs(rev, rev)[1]
784 yield self._getsegmentforrevs(rev, rev)[1]
775
785
776 inline_stream = get_stream()
786 inline_stream = get_stream()
777 next(inline_stream)
787 next(inline_stream)
778 return [
788 return [
779 (self._indexfile, inline_stream, index_size + data_size),
789 (self._indexfile, inline_stream, index_size + data_size),
780 ]
790 ]
781 else:
791 else:
782
792
783 def get_index_stream():
793 def get_index_stream():
784 with self._indexfp() as fp:
794 with self._indexfp() as fp:
785 yield None
795 yield None
786 if index_size <= 65536:
796 if index_size <= 65536:
787 yield fp.read(index_size)
797 yield fp.read(index_size)
788 else:
798 else:
789 yield from util.filechunkiter(fp, limit=index_size)
799 yield from util.filechunkiter(fp, limit=index_size)
790
800
791 def get_data_stream():
801 def get_data_stream():
792 with self._datafp() as fp:
802 with self._datafp() as fp:
793 yield None
803 yield None
794 if data_size <= 65536:
804 if data_size <= 65536:
795 yield fp.read(data_size)
805 yield fp.read(data_size)
796 else:
806 else:
797 yield from util.filechunkiter(fp, limit=data_size)
807 yield from util.filechunkiter(fp, limit=data_size)
798
808
799 index_stream = get_index_stream()
809 index_stream = get_index_stream()
800 next(index_stream)
810 next(index_stream)
801 data_stream = get_data_stream()
811 data_stream = get_data_stream()
802 next(data_stream)
812 next(data_stream)
803 return [
813 return [
804 (self._datafile, data_stream, data_size),
814 (self._datafile, data_stream, data_size),
805 (self._indexfile, index_stream, index_size),
815 (self._indexfile, index_stream, index_size),
806 ]
816 ]
807
817
808 def _loadindex(self, docket=None):
818 def _loadindex(self, docket=None):
809
819
810 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
820 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
811
821
812 if self.postfix is not None:
822 if self.postfix is not None:
813 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
823 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
814 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
824 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
815 entry_point = b'%s.i.a' % self.radix
825 entry_point = b'%s.i.a' % self.radix
816 elif self._try_split and self.opener.exists(self._split_index_file):
826 elif self._try_split and self.opener.exists(self._split_index_file):
817 entry_point = self._split_index_file
827 entry_point = self._split_index_file
818 else:
828 else:
819 entry_point = b'%s.i' % self.radix
829 entry_point = b'%s.i' % self.radix
820
830
821 if docket is not None:
831 if docket is not None:
822 self._docket = docket
832 self._docket = docket
823 self._docket_file = entry_point
833 self._docket_file = entry_point
824 else:
834 else:
825 self._initempty = True
835 self._initempty = True
826 entry_data = self._get_data(entry_point, mmapindexthreshold)
836 entry_data = self._get_data(entry_point, mmapindexthreshold)
827 if len(entry_data) > 0:
837 if len(entry_data) > 0:
828 header = INDEX_HEADER.unpack(entry_data[:4])[0]
838 header = INDEX_HEADER.unpack(entry_data[:4])[0]
829 self._initempty = False
839 self._initempty = False
830 else:
840 else:
831 header = new_header
841 header = new_header
832
842
833 self._format_flags = header & ~0xFFFF
843 self._format_flags = header & ~0xFFFF
834 self._format_version = header & 0xFFFF
844 self._format_version = header & 0xFFFF
835
845
836 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
846 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
837 if supported_flags is None:
847 if supported_flags is None:
838 msg = _(b'unknown version (%d) in revlog %s')
848 msg = _(b'unknown version (%d) in revlog %s')
839 msg %= (self._format_version, self.display_id)
849 msg %= (self._format_version, self.display_id)
840 raise error.RevlogError(msg)
850 raise error.RevlogError(msg)
841 elif self._format_flags & ~supported_flags:
851 elif self._format_flags & ~supported_flags:
842 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
852 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
843 display_flag = self._format_flags >> 16
853 display_flag = self._format_flags >> 16
844 msg %= (display_flag, self._format_version, self.display_id)
854 msg %= (display_flag, self._format_version, self.display_id)
845 raise error.RevlogError(msg)
855 raise error.RevlogError(msg)
846
856
847 features = FEATURES_BY_VERSION[self._format_version]
857 features = FEATURES_BY_VERSION[self._format_version]
848 self._inline = features[b'inline'](self._format_flags)
858 self._inline = features[b'inline'](self._format_flags)
849 self.delta_config.general_delta = features[b'generaldelta'](
859 self.delta_config.general_delta = features[b'generaldelta'](
850 self._format_flags
860 self._format_flags
851 )
861 )
852 self.feature_config.has_side_data = features[b'sidedata']
862 self.feature_config.has_side_data = features[b'sidedata']
853
863
854 if not features[b'docket']:
864 if not features[b'docket']:
855 self._indexfile = entry_point
865 self._indexfile = entry_point
856 index_data = entry_data
866 index_data = entry_data
857 else:
867 else:
858 self._docket_file = entry_point
868 self._docket_file = entry_point
859 if self._initempty:
869 if self._initempty:
860 self._docket = docketutil.default_docket(self, header)
870 self._docket = docketutil.default_docket(self, header)
861 else:
871 else:
862 self._docket = docketutil.parse_docket(
872 self._docket = docketutil.parse_docket(
863 self, entry_data, use_pending=self._trypending
873 self, entry_data, use_pending=self._trypending
864 )
874 )
865
875
866 if self._docket is not None:
876 if self._docket is not None:
867 self._indexfile = self._docket.index_filepath()
877 self._indexfile = self._docket.index_filepath()
868 index_data = b''
878 index_data = b''
869 index_size = self._docket.index_end
879 index_size = self._docket.index_end
870 if index_size > 0:
880 if index_size > 0:
871 index_data = self._get_data(
881 index_data = self._get_data(
872 self._indexfile, mmapindexthreshold, size=index_size
882 self._indexfile, mmapindexthreshold, size=index_size
873 )
883 )
874 if len(index_data) < index_size:
884 if len(index_data) < index_size:
875 msg = _(b'too few index data for %s: got %d, expected %d')
885 msg = _(b'too few index data for %s: got %d, expected %d')
876 msg %= (self.display_id, len(index_data), index_size)
886 msg %= (self.display_id, len(index_data), index_size)
877 raise error.RevlogError(msg)
887 raise error.RevlogError(msg)
878
888
879 self._inline = False
889 self._inline = False
880 # generaldelta implied by version 2 revlogs.
890 # generaldelta implied by version 2 revlogs.
881 self.delta_config.general_delta = True
891 self.delta_config.general_delta = True
882 # the logic for persistent nodemap will be dealt with within the
892 # the logic for persistent nodemap will be dealt with within the
883 # main docket, so disable it for now.
893 # main docket, so disable it for now.
884 self._nodemap_file = None
894 self._nodemap_file = None
885
895
886 if self._docket is not None:
896 if self._docket is not None:
887 self._datafile = self._docket.data_filepath()
897 self._datafile = self._docket.data_filepath()
888 self._sidedatafile = self._docket.sidedata_filepath()
898 self._sidedatafile = self._docket.sidedata_filepath()
889 elif self.postfix is None:
899 elif self.postfix is None:
890 self._datafile = b'%s.d' % self.radix
900 self._datafile = b'%s.d' % self.radix
891 else:
901 else:
892 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
902 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
893
903
894 self.nodeconstants = sha1nodeconstants
904 self.nodeconstants = sha1nodeconstants
895 self.nullid = self.nodeconstants.nullid
905 self.nullid = self.nodeconstants.nullid
896
906
897 # sparse-revlog can't be on without general-delta (issue6056)
907 # sparse-revlog can't be on without general-delta (issue6056)
898 if not self._generaldelta:
908 if not self._generaldelta:
899 self.delta_config.sparse_revlog = False
909 self.delta_config.sparse_revlog = False
900
910
901 self._storedeltachains = True
911 self._storedeltachains = True
902
912
903 devel_nodemap = (
913 devel_nodemap = (
904 self._nodemap_file
914 self._nodemap_file
905 and force_nodemap
915 and force_nodemap
906 and parse_index_v1_nodemap is not None
916 and parse_index_v1_nodemap is not None
907 )
917 )
908
918
909 use_rust_index = False
919 use_rust_index = False
910 if rustrevlog is not None:
920 if rustrevlog is not None:
911 if self._nodemap_file is not None:
921 if self._nodemap_file is not None:
912 use_rust_index = True
922 use_rust_index = True
913 else:
923 else:
914 use_rust_index = self.opener.options.get(b'rust.index')
924 use_rust_index = self.opener.options.get(b'rust.index')
915
925
916 self._parse_index = parse_index_v1
926 self._parse_index = parse_index_v1
917 if self._format_version == REVLOGV0:
927 if self._format_version == REVLOGV0:
918 self._parse_index = revlogv0.parse_index_v0
928 self._parse_index = revlogv0.parse_index_v0
919 elif self._format_version == REVLOGV2:
929 elif self._format_version == REVLOGV2:
920 self._parse_index = parse_index_v2
930 self._parse_index = parse_index_v2
921 elif self._format_version == CHANGELOGV2:
931 elif self._format_version == CHANGELOGV2:
922 self._parse_index = parse_index_cl_v2
932 self._parse_index = parse_index_cl_v2
923 elif devel_nodemap:
933 elif devel_nodemap:
924 self._parse_index = parse_index_v1_nodemap
934 self._parse_index = parse_index_v1_nodemap
925 elif use_rust_index:
935 elif use_rust_index:
926 self._parse_index = parse_index_v1_mixed
936 self._parse_index = parse_index_v1_mixed
927 try:
937 try:
928 d = self._parse_index(index_data, self._inline)
938 d = self._parse_index(index_data, self._inline)
929 index, chunkcache = d
939 index, chunkcache = d
930 use_nodemap = (
940 use_nodemap = (
931 not self._inline
941 not self._inline
932 and self._nodemap_file is not None
942 and self._nodemap_file is not None
933 and hasattr(index, 'update_nodemap_data')
943 and hasattr(index, 'update_nodemap_data')
934 )
944 )
935 if use_nodemap:
945 if use_nodemap:
936 nodemap_data = nodemaputil.persisted_data(self)
946 nodemap_data = nodemaputil.persisted_data(self)
937 if nodemap_data is not None:
947 if nodemap_data is not None:
938 docket = nodemap_data[0]
948 docket = nodemap_data[0]
939 if (
949 if (
940 len(d[0]) > docket.tip_rev
950 len(d[0]) > docket.tip_rev
941 and d[0][docket.tip_rev][7] == docket.tip_node
951 and d[0][docket.tip_rev][7] == docket.tip_node
942 ):
952 ):
943 # no changelog tampering
953 # no changelog tampering
944 self._nodemap_docket = docket
954 self._nodemap_docket = docket
945 index.update_nodemap_data(*nodemap_data)
955 index.update_nodemap_data(*nodemap_data)
946 except (ValueError, IndexError):
956 except (ValueError, IndexError):
947 raise error.RevlogError(
957 raise error.RevlogError(
948 _(b"index %s is corrupted") % self.display_id
958 _(b"index %s is corrupted") % self.display_id
949 )
959 )
950 self.index = index
960 self.index = index
951 self._segmentfile = randomaccessfile.randomaccessfile(
961 self._segmentfile = randomaccessfile.randomaccessfile(
952 self.opener,
962 self.opener,
953 (self._indexfile if self._inline else self._datafile),
963 (self._indexfile if self._inline else self._datafile),
954 self._chunkcachesize,
964 self._chunkcachesize,
955 chunkcache,
965 chunkcache,
956 )
966 )
957 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
967 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
958 self.opener,
968 self.opener,
959 self._sidedatafile,
969 self._sidedatafile,
960 self._chunkcachesize,
970 self._chunkcachesize,
961 )
971 )
962 # revnum -> (chain-length, sum-delta-length)
972 # revnum -> (chain-length, sum-delta-length)
963 self._chaininfocache = util.lrucachedict(500)
973 self._chaininfocache = util.lrucachedict(500)
964 # revlog header -> revlog compressor
974 # revlog header -> revlog compressor
965 self._decompressors = {}
975 self._decompressors = {}
966
976
967 def get_revlog(self):
977 def get_revlog(self):
968 """simple function to mirror API of other not-really-revlog API"""
978 """simple function to mirror API of other not-really-revlog API"""
969 return self
979 return self
970
980
971 @util.propertycache
981 @util.propertycache
972 def revlog_kind(self):
982 def revlog_kind(self):
973 return self.target[0]
983 return self.target[0]
974
984
975 @util.propertycache
985 @util.propertycache
976 def display_id(self):
986 def display_id(self):
977 """The public facing "ID" of the revlog that we use in message"""
987 """The public facing "ID" of the revlog that we use in message"""
978 if self.revlog_kind == KIND_FILELOG:
988 if self.revlog_kind == KIND_FILELOG:
979 # Reference the file without the "data/" prefix, so it is familiar
989 # Reference the file without the "data/" prefix, so it is familiar
980 # to the user.
990 # to the user.
981 return self.target[1]
991 return self.target[1]
982 else:
992 else:
983 return self.radix
993 return self.radix
984
994
985 def _get_decompressor(self, t):
995 def _get_decompressor(self, t):
986 try:
996 try:
987 compressor = self._decompressors[t]
997 compressor = self._decompressors[t]
988 except KeyError:
998 except KeyError:
989 try:
999 try:
990 engine = util.compengines.forrevlogheader(t)
1000 engine = util.compengines.forrevlogheader(t)
991 compressor = engine.revlogcompressor(self._compengineopts)
1001 compressor = engine.revlogcompressor(self._compengineopts)
992 self._decompressors[t] = compressor
1002 self._decompressors[t] = compressor
993 except KeyError:
1003 except KeyError:
994 raise error.RevlogError(
1004 raise error.RevlogError(
995 _(b'unknown compression type %s') % binascii.hexlify(t)
1005 _(b'unknown compression type %s') % binascii.hexlify(t)
996 )
1006 )
997 return compressor
1007 return compressor
998
1008
999 @util.propertycache
1009 @util.propertycache
1000 def _compressor(self):
1010 def _compressor(self):
1001 engine = util.compengines[self._compengine]
1011 engine = util.compengines[self._compengine]
1002 return engine.revlogcompressor(self._compengineopts)
1012 return engine.revlogcompressor(self._compengineopts)
1003
1013
1004 @util.propertycache
1014 @util.propertycache
1005 def _decompressor(self):
1015 def _decompressor(self):
1006 """the default decompressor"""
1016 """the default decompressor"""
1007 if self._docket is None:
1017 if self._docket is None:
1008 return None
1018 return None
1009 t = self._docket.default_compression_header
1019 t = self._docket.default_compression_header
1010 c = self._get_decompressor(t)
1020 c = self._get_decompressor(t)
1011 return c.decompress
1021 return c.decompress
1012
1022
1013 def _indexfp(self):
1023 def _indexfp(self):
1014 """file object for the revlog's index file"""
1024 """file object for the revlog's index file"""
1015 return self.opener(self._indexfile, mode=b"r")
1025 return self.opener(self._indexfile, mode=b"r")
1016
1026
1017 def __index_write_fp(self):
1027 def __index_write_fp(self):
1018 # You should not use this directly and use `_writing` instead
1028 # You should not use this directly and use `_writing` instead
1019 try:
1029 try:
1020 f = self.opener(
1030 f = self.opener(
1021 self._indexfile, mode=b"r+", checkambig=self._checkambig
1031 self._indexfile, mode=b"r+", checkambig=self._checkambig
1022 )
1032 )
1023 if self._docket is None:
1033 if self._docket is None:
1024 f.seek(0, os.SEEK_END)
1034 f.seek(0, os.SEEK_END)
1025 else:
1035 else:
1026 f.seek(self._docket.index_end, os.SEEK_SET)
1036 f.seek(self._docket.index_end, os.SEEK_SET)
1027 return f
1037 return f
1028 except FileNotFoundError:
1038 except FileNotFoundError:
1029 return self.opener(
1039 return self.opener(
1030 self._indexfile, mode=b"w+", checkambig=self._checkambig
1040 self._indexfile, mode=b"w+", checkambig=self._checkambig
1031 )
1041 )
1032
1042
1033 def __index_new_fp(self):
1043 def __index_new_fp(self):
1034 # You should not use this unless you are upgrading from inline revlog
1044 # You should not use this unless you are upgrading from inline revlog
1035 return self.opener(
1045 return self.opener(
1036 self._indexfile,
1046 self._indexfile,
1037 mode=b"w",
1047 mode=b"w",
1038 checkambig=self._checkambig,
1048 checkambig=self._checkambig,
1039 atomictemp=True,
1049 atomictemp=True,
1040 )
1050 )
1041
1051
1042 def _datafp(self, mode=b'r'):
1052 def _datafp(self, mode=b'r'):
1043 """file object for the revlog's data file"""
1053 """file object for the revlog's data file"""
1044 return self.opener(self._datafile, mode=mode)
1054 return self.opener(self._datafile, mode=mode)
1045
1055
1046 @contextlib.contextmanager
1056 @contextlib.contextmanager
1047 def _sidedatareadfp(self):
1057 def _sidedatareadfp(self):
1048 """file object suitable to read sidedata"""
1058 """file object suitable to read sidedata"""
1049 if self._writinghandles:
1059 if self._writinghandles:
1050 yield self._writinghandles[2]
1060 yield self._writinghandles[2]
1051 else:
1061 else:
1052 with self.opener(self._sidedatafile) as fp:
1062 with self.opener(self._sidedatafile) as fp:
1053 yield fp
1063 yield fp
1054
1064
1055 def tiprev(self):
1065 def tiprev(self):
1056 return len(self.index) - 1
1066 return len(self.index) - 1
1057
1067
1058 def tip(self):
1068 def tip(self):
1059 return self.node(self.tiprev())
1069 return self.node(self.tiprev())
1060
1070
1061 def __contains__(self, rev):
1071 def __contains__(self, rev):
1062 return 0 <= rev < len(self)
1072 return 0 <= rev < len(self)
1063
1073
1064 def __len__(self):
1074 def __len__(self):
1065 return len(self.index)
1075 return len(self.index)
1066
1076
1067 def __iter__(self):
1077 def __iter__(self):
1068 return iter(range(len(self)))
1078 return iter(range(len(self)))
1069
1079
1070 def revs(self, start=0, stop=None):
1080 def revs(self, start=0, stop=None):
1071 """iterate over all rev in this revlog (from start to stop)"""
1081 """iterate over all rev in this revlog (from start to stop)"""
1072 return storageutil.iterrevs(len(self), start=start, stop=stop)
1082 return storageutil.iterrevs(len(self), start=start, stop=stop)
1073
1083
1074 def hasnode(self, node):
1084 def hasnode(self, node):
1075 try:
1085 try:
1076 self.rev(node)
1086 self.rev(node)
1077 return True
1087 return True
1078 except KeyError:
1088 except KeyError:
1079 return False
1089 return False
1080
1090
1081 def _candelta(self, baserev, rev):
1091 def _candelta(self, baserev, rev):
1082 """whether two revisions (baserev, rev) can be delta-ed or not"""
1092 """whether two revisions (baserev, rev) can be delta-ed or not"""
1083 # Disable delta if either rev requires a content-changing flag
1093 # Disable delta if either rev requires a content-changing flag
1084 # processor (ex. LFS). This is because such flag processor can alter
1094 # processor (ex. LFS). This is because such flag processor can alter
1085 # the rawtext content that the delta will be based on, and two clients
1095 # the rawtext content that the delta will be based on, and two clients
1086 # could have a same revlog node with different flags (i.e. different
1096 # could have a same revlog node with different flags (i.e. different
1087 # rawtext contents) and the delta could be incompatible.
1097 # rawtext contents) and the delta could be incompatible.
1088 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1098 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1089 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1099 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1090 ):
1100 ):
1091 return False
1101 return False
1092 return True
1102 return True
1093
1103
1094 def update_caches(self, transaction):
1104 def update_caches(self, transaction):
1095 """update on disk cache
1105 """update on disk cache
1096
1106
1097 If a transaction is passed, the update may be delayed to transaction
1107 If a transaction is passed, the update may be delayed to transaction
1098 commit."""
1108 commit."""
1099 if self._nodemap_file is not None:
1109 if self._nodemap_file is not None:
1100 if transaction is None:
1110 if transaction is None:
1101 nodemaputil.update_persistent_nodemap(self)
1111 nodemaputil.update_persistent_nodemap(self)
1102 else:
1112 else:
1103 nodemaputil.setup_persistent_nodemap(transaction, self)
1113 nodemaputil.setup_persistent_nodemap(transaction, self)
1104
1114
1105 def clearcaches(self):
1115 def clearcaches(self):
1106 """Clear in-memory caches"""
1116 """Clear in-memory caches"""
1107 self._revisioncache = None
1117 self._revisioncache = None
1108 self._chainbasecache.clear()
1118 self._chainbasecache.clear()
1109 self._segmentfile.clear_cache()
1119 self._segmentfile.clear_cache()
1110 self._segmentfile_sidedata.clear_cache()
1120 self._segmentfile_sidedata.clear_cache()
1111 self._pcache = {}
1121 self._pcache = {}
1112 self._nodemap_docket = None
1122 self._nodemap_docket = None
1113 self.index.clearcaches()
1123 self.index.clearcaches()
1114 # The python code is the one responsible for validating the docket, we
1124 # The python code is the one responsible for validating the docket, we
1115 # end up having to refresh it here.
1125 # end up having to refresh it here.
1116 use_nodemap = (
1126 use_nodemap = (
1117 not self._inline
1127 not self._inline
1118 and self._nodemap_file is not None
1128 and self._nodemap_file is not None
1119 and hasattr(self.index, 'update_nodemap_data')
1129 and hasattr(self.index, 'update_nodemap_data')
1120 )
1130 )
1121 if use_nodemap:
1131 if use_nodemap:
1122 nodemap_data = nodemaputil.persisted_data(self)
1132 nodemap_data = nodemaputil.persisted_data(self)
1123 if nodemap_data is not None:
1133 if nodemap_data is not None:
1124 self._nodemap_docket = nodemap_data[0]
1134 self._nodemap_docket = nodemap_data[0]
1125 self.index.update_nodemap_data(*nodemap_data)
1135 self.index.update_nodemap_data(*nodemap_data)
1126
1136
1127 def rev(self, node):
1137 def rev(self, node):
1128 """return the revision number associated with a <nodeid>"""
1138 """return the revision number associated with a <nodeid>"""
1129 try:
1139 try:
1130 return self.index.rev(node)
1140 return self.index.rev(node)
1131 except TypeError:
1141 except TypeError:
1132 raise
1142 raise
1133 except error.RevlogError:
1143 except error.RevlogError:
1134 # parsers.c radix tree lookup failed
1144 # parsers.c radix tree lookup failed
1135 if (
1145 if (
1136 node == self.nodeconstants.wdirid
1146 node == self.nodeconstants.wdirid
1137 or node in self.nodeconstants.wdirfilenodeids
1147 or node in self.nodeconstants.wdirfilenodeids
1138 ):
1148 ):
1139 raise error.WdirUnsupported
1149 raise error.WdirUnsupported
1140 raise error.LookupError(node, self.display_id, _(b'no node'))
1150 raise error.LookupError(node, self.display_id, _(b'no node'))
1141
1151
1142 # Accessors for index entries.
1152 # Accessors for index entries.
1143
1153
1144 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1154 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1145 # are flags.
1155 # are flags.
1146 def start(self, rev):
1156 def start(self, rev):
1147 return int(self.index[rev][0] >> 16)
1157 return int(self.index[rev][0] >> 16)
1148
1158
1149 def sidedata_cut_off(self, rev):
1159 def sidedata_cut_off(self, rev):
1150 sd_cut_off = self.index[rev][8]
1160 sd_cut_off = self.index[rev][8]
1151 if sd_cut_off != 0:
1161 if sd_cut_off != 0:
1152 return sd_cut_off
1162 return sd_cut_off
1153 # This is some annoying dance, because entries without sidedata
1163 # This is some annoying dance, because entries without sidedata
1154 # currently use 0 as their ofsset. (instead of previous-offset +
1164 # currently use 0 as their ofsset. (instead of previous-offset +
1155 # previous-size)
1165 # previous-size)
1156 #
1166 #
1157 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1167 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1158 # In the meantime, we need this.
1168 # In the meantime, we need this.
1159 while 0 <= rev:
1169 while 0 <= rev:
1160 e = self.index[rev]
1170 e = self.index[rev]
1161 if e[9] != 0:
1171 if e[9] != 0:
1162 return e[8] + e[9]
1172 return e[8] + e[9]
1163 rev -= 1
1173 rev -= 1
1164 return 0
1174 return 0
1165
1175
1166 def flags(self, rev):
1176 def flags(self, rev):
1167 return self.index[rev][0] & 0xFFFF
1177 return self.index[rev][0] & 0xFFFF
1168
1178
1169 def length(self, rev):
1179 def length(self, rev):
1170 return self.index[rev][1]
1180 return self.index[rev][1]
1171
1181
1172 def sidedata_length(self, rev):
1182 def sidedata_length(self, rev):
1173 if not self.hassidedata:
1183 if not self.hassidedata:
1174 return 0
1184 return 0
1175 return self.index[rev][9]
1185 return self.index[rev][9]
1176
1186
1177 def rawsize(self, rev):
1187 def rawsize(self, rev):
1178 """return the length of the uncompressed text for a given revision"""
1188 """return the length of the uncompressed text for a given revision"""
1179 l = self.index[rev][2]
1189 l = self.index[rev][2]
1180 if l >= 0:
1190 if l >= 0:
1181 return l
1191 return l
1182
1192
1183 t = self.rawdata(rev)
1193 t = self.rawdata(rev)
1184 return len(t)
1194 return len(t)
1185
1195
1186 def size(self, rev):
1196 def size(self, rev):
1187 """length of non-raw text (processed by a "read" flag processor)"""
1197 """length of non-raw text (processed by a "read" flag processor)"""
1188 # fast path: if no "read" flag processor could change the content,
1198 # fast path: if no "read" flag processor could change the content,
1189 # size is rawsize. note: ELLIPSIS is known to not change the content.
1199 # size is rawsize. note: ELLIPSIS is known to not change the content.
1190 flags = self.flags(rev)
1200 flags = self.flags(rev)
1191 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1201 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1192 return self.rawsize(rev)
1202 return self.rawsize(rev)
1193
1203
1194 return len(self.revision(rev))
1204 return len(self.revision(rev))
1195
1205
1196 def fast_rank(self, rev):
1206 def fast_rank(self, rev):
1197 """Return the rank of a revision if already known, or None otherwise.
1207 """Return the rank of a revision if already known, or None otherwise.
1198
1208
1199 The rank of a revision is the size of the sub-graph it defines as a
1209 The rank of a revision is the size of the sub-graph it defines as a
1200 head. Equivalently, the rank of a revision `r` is the size of the set
1210 head. Equivalently, the rank of a revision `r` is the size of the set
1201 `ancestors(r)`, `r` included.
1211 `ancestors(r)`, `r` included.
1202
1212
1203 This method returns the rank retrieved from the revlog in constant
1213 This method returns the rank retrieved from the revlog in constant
1204 time. It makes no attempt at computing unknown values for versions of
1214 time. It makes no attempt at computing unknown values for versions of
1205 the revlog which do not persist the rank.
1215 the revlog which do not persist the rank.
1206 """
1216 """
1207 rank = self.index[rev][ENTRY_RANK]
1217 rank = self.index[rev][ENTRY_RANK]
1208 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1218 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1209 return None
1219 return None
1210 if rev == nullrev:
1220 if rev == nullrev:
1211 return 0 # convention
1221 return 0 # convention
1212 return rank
1222 return rank
1213
1223
1214 def chainbase(self, rev):
1224 def chainbase(self, rev):
1215 base = self._chainbasecache.get(rev)
1225 base = self._chainbasecache.get(rev)
1216 if base is not None:
1226 if base is not None:
1217 return base
1227 return base
1218
1228
1219 index = self.index
1229 index = self.index
1220 iterrev = rev
1230 iterrev = rev
1221 base = index[iterrev][3]
1231 base = index[iterrev][3]
1222 while base != iterrev:
1232 while base != iterrev:
1223 iterrev = base
1233 iterrev = base
1224 base = index[iterrev][3]
1234 base = index[iterrev][3]
1225
1235
1226 self._chainbasecache[rev] = base
1236 self._chainbasecache[rev] = base
1227 return base
1237 return base
1228
1238
1229 def linkrev(self, rev):
1239 def linkrev(self, rev):
1230 return self.index[rev][4]
1240 return self.index[rev][4]
1231
1241
1232 def parentrevs(self, rev):
1242 def parentrevs(self, rev):
1233 try:
1243 try:
1234 entry = self.index[rev]
1244 entry = self.index[rev]
1235 except IndexError:
1245 except IndexError:
1236 if rev == wdirrev:
1246 if rev == wdirrev:
1237 raise error.WdirUnsupported
1247 raise error.WdirUnsupported
1238 raise
1248 raise
1239
1249
1240 if self.canonical_parent_order and entry[5] == nullrev:
1250 if self.canonical_parent_order and entry[5] == nullrev:
1241 return entry[6], entry[5]
1251 return entry[6], entry[5]
1242 else:
1252 else:
1243 return entry[5], entry[6]
1253 return entry[5], entry[6]
1244
1254
1245 # fast parentrevs(rev) where rev isn't filtered
1255 # fast parentrevs(rev) where rev isn't filtered
1246 _uncheckedparentrevs = parentrevs
1256 _uncheckedparentrevs = parentrevs
1247
1257
1248 def node(self, rev):
1258 def node(self, rev):
1249 try:
1259 try:
1250 return self.index[rev][7]
1260 return self.index[rev][7]
1251 except IndexError:
1261 except IndexError:
1252 if rev == wdirrev:
1262 if rev == wdirrev:
1253 raise error.WdirUnsupported
1263 raise error.WdirUnsupported
1254 raise
1264 raise
1255
1265
1256 # Derived from index values.
1266 # Derived from index values.
1257
1267
1258 def end(self, rev):
1268 def end(self, rev):
1259 return self.start(rev) + self.length(rev)
1269 return self.start(rev) + self.length(rev)
1260
1270
1261 def parents(self, node):
1271 def parents(self, node):
1262 i = self.index
1272 i = self.index
1263 d = i[self.rev(node)]
1273 d = i[self.rev(node)]
1264 # inline node() to avoid function call overhead
1274 # inline node() to avoid function call overhead
1265 if self.canonical_parent_order and d[5] == self.nullid:
1275 if self.canonical_parent_order and d[5] == self.nullid:
1266 return i[d[6]][7], i[d[5]][7]
1276 return i[d[6]][7], i[d[5]][7]
1267 else:
1277 else:
1268 return i[d[5]][7], i[d[6]][7]
1278 return i[d[5]][7], i[d[6]][7]
1269
1279
1270 def chainlen(self, rev):
1280 def chainlen(self, rev):
1271 return self._chaininfo(rev)[0]
1281 return self._chaininfo(rev)[0]
1272
1282
1273 def _chaininfo(self, rev):
1283 def _chaininfo(self, rev):
1274 chaininfocache = self._chaininfocache
1284 chaininfocache = self._chaininfocache
1275 if rev in chaininfocache:
1285 if rev in chaininfocache:
1276 return chaininfocache[rev]
1286 return chaininfocache[rev]
1277 index = self.index
1287 index = self.index
1278 generaldelta = self._generaldelta
1288 generaldelta = self._generaldelta
1279 iterrev = rev
1289 iterrev = rev
1280 e = index[iterrev]
1290 e = index[iterrev]
1281 clen = 0
1291 clen = 0
1282 compresseddeltalen = 0
1292 compresseddeltalen = 0
1283 while iterrev != e[3]:
1293 while iterrev != e[3]:
1284 clen += 1
1294 clen += 1
1285 compresseddeltalen += e[1]
1295 compresseddeltalen += e[1]
1286 if generaldelta:
1296 if generaldelta:
1287 iterrev = e[3]
1297 iterrev = e[3]
1288 else:
1298 else:
1289 iterrev -= 1
1299 iterrev -= 1
1290 if iterrev in chaininfocache:
1300 if iterrev in chaininfocache:
1291 t = chaininfocache[iterrev]
1301 t = chaininfocache[iterrev]
1292 clen += t[0]
1302 clen += t[0]
1293 compresseddeltalen += t[1]
1303 compresseddeltalen += t[1]
1294 break
1304 break
1295 e = index[iterrev]
1305 e = index[iterrev]
1296 else:
1306 else:
1297 # Add text length of base since decompressing that also takes
1307 # Add text length of base since decompressing that also takes
1298 # work. For cache hits the length is already included.
1308 # work. For cache hits the length is already included.
1299 compresseddeltalen += e[1]
1309 compresseddeltalen += e[1]
1300 r = (clen, compresseddeltalen)
1310 r = (clen, compresseddeltalen)
1301 chaininfocache[rev] = r
1311 chaininfocache[rev] = r
1302 return r
1312 return r
1303
1313
1304 def _deltachain(self, rev, stoprev=None):
1314 def _deltachain(self, rev, stoprev=None):
1305 """Obtain the delta chain for a revision.
1315 """Obtain the delta chain for a revision.
1306
1316
1307 ``stoprev`` specifies a revision to stop at. If not specified, we
1317 ``stoprev`` specifies a revision to stop at. If not specified, we
1308 stop at the base of the chain.
1318 stop at the base of the chain.
1309
1319
1310 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1320 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1311 revs in ascending order and ``stopped`` is a bool indicating whether
1321 revs in ascending order and ``stopped`` is a bool indicating whether
1312 ``stoprev`` was hit.
1322 ``stoprev`` was hit.
1313 """
1323 """
1314 # Try C implementation.
1324 # Try C implementation.
1315 try:
1325 try:
1316 return self.index.deltachain(rev, stoprev, self._generaldelta)
1326 return self.index.deltachain(rev, stoprev, self._generaldelta)
1317 except AttributeError:
1327 except AttributeError:
1318 pass
1328 pass
1319
1329
1320 chain = []
1330 chain = []
1321
1331
1322 # Alias to prevent attribute lookup in tight loop.
1332 # Alias to prevent attribute lookup in tight loop.
1323 index = self.index
1333 index = self.index
1324 generaldelta = self._generaldelta
1334 generaldelta = self._generaldelta
1325
1335
1326 iterrev = rev
1336 iterrev = rev
1327 e = index[iterrev]
1337 e = index[iterrev]
1328 while iterrev != e[3] and iterrev != stoprev:
1338 while iterrev != e[3] and iterrev != stoprev:
1329 chain.append(iterrev)
1339 chain.append(iterrev)
1330 if generaldelta:
1340 if generaldelta:
1331 iterrev = e[3]
1341 iterrev = e[3]
1332 else:
1342 else:
1333 iterrev -= 1
1343 iterrev -= 1
1334 e = index[iterrev]
1344 e = index[iterrev]
1335
1345
1336 if iterrev == stoprev:
1346 if iterrev == stoprev:
1337 stopped = True
1347 stopped = True
1338 else:
1348 else:
1339 chain.append(iterrev)
1349 chain.append(iterrev)
1340 stopped = False
1350 stopped = False
1341
1351
1342 chain.reverse()
1352 chain.reverse()
1343 return chain, stopped
1353 return chain, stopped
1344
1354
1345 def ancestors(self, revs, stoprev=0, inclusive=False):
1355 def ancestors(self, revs, stoprev=0, inclusive=False):
1346 """Generate the ancestors of 'revs' in reverse revision order.
1356 """Generate the ancestors of 'revs' in reverse revision order.
1347 Does not generate revs lower than stoprev.
1357 Does not generate revs lower than stoprev.
1348
1358
1349 See the documentation for ancestor.lazyancestors for more details."""
1359 See the documentation for ancestor.lazyancestors for more details."""
1350
1360
1351 # first, make sure start revisions aren't filtered
1361 # first, make sure start revisions aren't filtered
1352 revs = list(revs)
1362 revs = list(revs)
1353 checkrev = self.node
1363 checkrev = self.node
1354 for r in revs:
1364 for r in revs:
1355 checkrev(r)
1365 checkrev(r)
1356 # and we're sure ancestors aren't filtered as well
1366 # and we're sure ancestors aren't filtered as well
1357
1367
1358 if rustancestor is not None and self.index.rust_ext_compat:
1368 if rustancestor is not None and self.index.rust_ext_compat:
1359 lazyancestors = rustancestor.LazyAncestors
1369 lazyancestors = rustancestor.LazyAncestors
1360 arg = self.index
1370 arg = self.index
1361 else:
1371 else:
1362 lazyancestors = ancestor.lazyancestors
1372 lazyancestors = ancestor.lazyancestors
1363 arg = self._uncheckedparentrevs
1373 arg = self._uncheckedparentrevs
1364 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1374 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1365
1375
1366 def descendants(self, revs):
1376 def descendants(self, revs):
1367 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1377 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1368
1378
1369 def findcommonmissing(self, common=None, heads=None):
1379 def findcommonmissing(self, common=None, heads=None):
1370 """Return a tuple of the ancestors of common and the ancestors of heads
1380 """Return a tuple of the ancestors of common and the ancestors of heads
1371 that are not ancestors of common. In revset terminology, we return the
1381 that are not ancestors of common. In revset terminology, we return the
1372 tuple:
1382 tuple:
1373
1383
1374 ::common, (::heads) - (::common)
1384 ::common, (::heads) - (::common)
1375
1385
1376 The list is sorted by revision number, meaning it is
1386 The list is sorted by revision number, meaning it is
1377 topologically sorted.
1387 topologically sorted.
1378
1388
1379 'heads' and 'common' are both lists of node IDs. If heads is
1389 'heads' and 'common' are both lists of node IDs. If heads is
1380 not supplied, uses all of the revlog's heads. If common is not
1390 not supplied, uses all of the revlog's heads. If common is not
1381 supplied, uses nullid."""
1391 supplied, uses nullid."""
1382 if common is None:
1392 if common is None:
1383 common = [self.nullid]
1393 common = [self.nullid]
1384 if heads is None:
1394 if heads is None:
1385 heads = self.heads()
1395 heads = self.heads()
1386
1396
1387 common = [self.rev(n) for n in common]
1397 common = [self.rev(n) for n in common]
1388 heads = [self.rev(n) for n in heads]
1398 heads = [self.rev(n) for n in heads]
1389
1399
1390 # we want the ancestors, but inclusive
1400 # we want the ancestors, but inclusive
1391 class lazyset:
1401 class lazyset:
1392 def __init__(self, lazyvalues):
1402 def __init__(self, lazyvalues):
1393 self.addedvalues = set()
1403 self.addedvalues = set()
1394 self.lazyvalues = lazyvalues
1404 self.lazyvalues = lazyvalues
1395
1405
1396 def __contains__(self, value):
1406 def __contains__(self, value):
1397 return value in self.addedvalues or value in self.lazyvalues
1407 return value in self.addedvalues or value in self.lazyvalues
1398
1408
1399 def __iter__(self):
1409 def __iter__(self):
1400 added = self.addedvalues
1410 added = self.addedvalues
1401 for r in added:
1411 for r in added:
1402 yield r
1412 yield r
1403 for r in self.lazyvalues:
1413 for r in self.lazyvalues:
1404 if not r in added:
1414 if not r in added:
1405 yield r
1415 yield r
1406
1416
1407 def add(self, value):
1417 def add(self, value):
1408 self.addedvalues.add(value)
1418 self.addedvalues.add(value)
1409
1419
1410 def update(self, values):
1420 def update(self, values):
1411 self.addedvalues.update(values)
1421 self.addedvalues.update(values)
1412
1422
1413 has = lazyset(self.ancestors(common))
1423 has = lazyset(self.ancestors(common))
1414 has.add(nullrev)
1424 has.add(nullrev)
1415 has.update(common)
1425 has.update(common)
1416
1426
1417 # take all ancestors from heads that aren't in has
1427 # take all ancestors from heads that aren't in has
1418 missing = set()
1428 missing = set()
1419 visit = collections.deque(r for r in heads if r not in has)
1429 visit = collections.deque(r for r in heads if r not in has)
1420 while visit:
1430 while visit:
1421 r = visit.popleft()
1431 r = visit.popleft()
1422 if r in missing:
1432 if r in missing:
1423 continue
1433 continue
1424 else:
1434 else:
1425 missing.add(r)
1435 missing.add(r)
1426 for p in self.parentrevs(r):
1436 for p in self.parentrevs(r):
1427 if p not in has:
1437 if p not in has:
1428 visit.append(p)
1438 visit.append(p)
1429 missing = list(missing)
1439 missing = list(missing)
1430 missing.sort()
1440 missing.sort()
1431 return has, [self.node(miss) for miss in missing]
1441 return has, [self.node(miss) for miss in missing]
1432
1442
1433 def incrementalmissingrevs(self, common=None):
1443 def incrementalmissingrevs(self, common=None):
1434 """Return an object that can be used to incrementally compute the
1444 """Return an object that can be used to incrementally compute the
1435 revision numbers of the ancestors of arbitrary sets that are not
1445 revision numbers of the ancestors of arbitrary sets that are not
1436 ancestors of common. This is an ancestor.incrementalmissingancestors
1446 ancestors of common. This is an ancestor.incrementalmissingancestors
1437 object.
1447 object.
1438
1448
1439 'common' is a list of revision numbers. If common is not supplied, uses
1449 'common' is a list of revision numbers. If common is not supplied, uses
1440 nullrev.
1450 nullrev.
1441 """
1451 """
1442 if common is None:
1452 if common is None:
1443 common = [nullrev]
1453 common = [nullrev]
1444
1454
1445 if rustancestor is not None and self.index.rust_ext_compat:
1455 if rustancestor is not None and self.index.rust_ext_compat:
1446 return rustancestor.MissingAncestors(self.index, common)
1456 return rustancestor.MissingAncestors(self.index, common)
1447 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1457 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1448
1458
1449 def findmissingrevs(self, common=None, heads=None):
1459 def findmissingrevs(self, common=None, heads=None):
1450 """Return the revision numbers of the ancestors of heads that
1460 """Return the revision numbers of the ancestors of heads that
1451 are not ancestors of common.
1461 are not ancestors of common.
1452
1462
1453 More specifically, return a list of revision numbers corresponding to
1463 More specifically, return a list of revision numbers corresponding to
1454 nodes N such that every N satisfies the following constraints:
1464 nodes N such that every N satisfies the following constraints:
1455
1465
1456 1. N is an ancestor of some node in 'heads'
1466 1. N is an ancestor of some node in 'heads'
1457 2. N is not an ancestor of any node in 'common'
1467 2. N is not an ancestor of any node in 'common'
1458
1468
1459 The list is sorted by revision number, meaning it is
1469 The list is sorted by revision number, meaning it is
1460 topologically sorted.
1470 topologically sorted.
1461
1471
1462 'heads' and 'common' are both lists of revision numbers. If heads is
1472 'heads' and 'common' are both lists of revision numbers. If heads is
1463 not supplied, uses all of the revlog's heads. If common is not
1473 not supplied, uses all of the revlog's heads. If common is not
1464 supplied, uses nullid."""
1474 supplied, uses nullid."""
1465 if common is None:
1475 if common is None:
1466 common = [nullrev]
1476 common = [nullrev]
1467 if heads is None:
1477 if heads is None:
1468 heads = self.headrevs()
1478 heads = self.headrevs()
1469
1479
1470 inc = self.incrementalmissingrevs(common=common)
1480 inc = self.incrementalmissingrevs(common=common)
1471 return inc.missingancestors(heads)
1481 return inc.missingancestors(heads)
1472
1482
1473 def findmissing(self, common=None, heads=None):
1483 def findmissing(self, common=None, heads=None):
1474 """Return the ancestors of heads that are not ancestors of common.
1484 """Return the ancestors of heads that are not ancestors of common.
1475
1485
1476 More specifically, return a list of nodes N such that every N
1486 More specifically, return a list of nodes N such that every N
1477 satisfies the following constraints:
1487 satisfies the following constraints:
1478
1488
1479 1. N is an ancestor of some node in 'heads'
1489 1. N is an ancestor of some node in 'heads'
1480 2. N is not an ancestor of any node in 'common'
1490 2. N is not an ancestor of any node in 'common'
1481
1491
1482 The list is sorted by revision number, meaning it is
1492 The list is sorted by revision number, meaning it is
1483 topologically sorted.
1493 topologically sorted.
1484
1494
1485 'heads' and 'common' are both lists of node IDs. If heads is
1495 'heads' and 'common' are both lists of node IDs. If heads is
1486 not supplied, uses all of the revlog's heads. If common is not
1496 not supplied, uses all of the revlog's heads. If common is not
1487 supplied, uses nullid."""
1497 supplied, uses nullid."""
1488 if common is None:
1498 if common is None:
1489 common = [self.nullid]
1499 common = [self.nullid]
1490 if heads is None:
1500 if heads is None:
1491 heads = self.heads()
1501 heads = self.heads()
1492
1502
1493 common = [self.rev(n) for n in common]
1503 common = [self.rev(n) for n in common]
1494 heads = [self.rev(n) for n in heads]
1504 heads = [self.rev(n) for n in heads]
1495
1505
1496 inc = self.incrementalmissingrevs(common=common)
1506 inc = self.incrementalmissingrevs(common=common)
1497 return [self.node(r) for r in inc.missingancestors(heads)]
1507 return [self.node(r) for r in inc.missingancestors(heads)]
1498
1508
1499 def nodesbetween(self, roots=None, heads=None):
1509 def nodesbetween(self, roots=None, heads=None):
1500 """Return a topological path from 'roots' to 'heads'.
1510 """Return a topological path from 'roots' to 'heads'.
1501
1511
1502 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1512 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1503 topologically sorted list of all nodes N that satisfy both of
1513 topologically sorted list of all nodes N that satisfy both of
1504 these constraints:
1514 these constraints:
1505
1515
1506 1. N is a descendant of some node in 'roots'
1516 1. N is a descendant of some node in 'roots'
1507 2. N is an ancestor of some node in 'heads'
1517 2. N is an ancestor of some node in 'heads'
1508
1518
1509 Every node is considered to be both a descendant and an ancestor
1519 Every node is considered to be both a descendant and an ancestor
1510 of itself, so every reachable node in 'roots' and 'heads' will be
1520 of itself, so every reachable node in 'roots' and 'heads' will be
1511 included in 'nodes'.
1521 included in 'nodes'.
1512
1522
1513 'outroots' is the list of reachable nodes in 'roots', i.e., the
1523 'outroots' is the list of reachable nodes in 'roots', i.e., the
1514 subset of 'roots' that is returned in 'nodes'. Likewise,
1524 subset of 'roots' that is returned in 'nodes'. Likewise,
1515 'outheads' is the subset of 'heads' that is also in 'nodes'.
1525 'outheads' is the subset of 'heads' that is also in 'nodes'.
1516
1526
1517 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1527 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1518 unspecified, uses nullid as the only root. If 'heads' is
1528 unspecified, uses nullid as the only root. If 'heads' is
1519 unspecified, uses list of all of the revlog's heads."""
1529 unspecified, uses list of all of the revlog's heads."""
1520 nonodes = ([], [], [])
1530 nonodes = ([], [], [])
1521 if roots is not None:
1531 if roots is not None:
1522 roots = list(roots)
1532 roots = list(roots)
1523 if not roots:
1533 if not roots:
1524 return nonodes
1534 return nonodes
1525 lowestrev = min([self.rev(n) for n in roots])
1535 lowestrev = min([self.rev(n) for n in roots])
1526 else:
1536 else:
1527 roots = [self.nullid] # Everybody's a descendant of nullid
1537 roots = [self.nullid] # Everybody's a descendant of nullid
1528 lowestrev = nullrev
1538 lowestrev = nullrev
1529 if (lowestrev == nullrev) and (heads is None):
1539 if (lowestrev == nullrev) and (heads is None):
1530 # We want _all_ the nodes!
1540 # We want _all_ the nodes!
1531 return (
1541 return (
1532 [self.node(r) for r in self],
1542 [self.node(r) for r in self],
1533 [self.nullid],
1543 [self.nullid],
1534 list(self.heads()),
1544 list(self.heads()),
1535 )
1545 )
1536 if heads is None:
1546 if heads is None:
1537 # All nodes are ancestors, so the latest ancestor is the last
1547 # All nodes are ancestors, so the latest ancestor is the last
1538 # node.
1548 # node.
1539 highestrev = len(self) - 1
1549 highestrev = len(self) - 1
1540 # Set ancestors to None to signal that every node is an ancestor.
1550 # Set ancestors to None to signal that every node is an ancestor.
1541 ancestors = None
1551 ancestors = None
1542 # Set heads to an empty dictionary for later discovery of heads
1552 # Set heads to an empty dictionary for later discovery of heads
1543 heads = {}
1553 heads = {}
1544 else:
1554 else:
1545 heads = list(heads)
1555 heads = list(heads)
1546 if not heads:
1556 if not heads:
1547 return nonodes
1557 return nonodes
1548 ancestors = set()
1558 ancestors = set()
1549 # Turn heads into a dictionary so we can remove 'fake' heads.
1559 # Turn heads into a dictionary so we can remove 'fake' heads.
1550 # Also, later we will be using it to filter out the heads we can't
1560 # Also, later we will be using it to filter out the heads we can't
1551 # find from roots.
1561 # find from roots.
1552 heads = dict.fromkeys(heads, False)
1562 heads = dict.fromkeys(heads, False)
1553 # Start at the top and keep marking parents until we're done.
1563 # Start at the top and keep marking parents until we're done.
1554 nodestotag = set(heads)
1564 nodestotag = set(heads)
1555 # Remember where the top was so we can use it as a limit later.
1565 # Remember where the top was so we can use it as a limit later.
1556 highestrev = max([self.rev(n) for n in nodestotag])
1566 highestrev = max([self.rev(n) for n in nodestotag])
1557 while nodestotag:
1567 while nodestotag:
1558 # grab a node to tag
1568 # grab a node to tag
1559 n = nodestotag.pop()
1569 n = nodestotag.pop()
1560 # Never tag nullid
1570 # Never tag nullid
1561 if n == self.nullid:
1571 if n == self.nullid:
1562 continue
1572 continue
1563 # A node's revision number represents its place in a
1573 # A node's revision number represents its place in a
1564 # topologically sorted list of nodes.
1574 # topologically sorted list of nodes.
1565 r = self.rev(n)
1575 r = self.rev(n)
1566 if r >= lowestrev:
1576 if r >= lowestrev:
1567 if n not in ancestors:
1577 if n not in ancestors:
1568 # If we are possibly a descendant of one of the roots
1578 # If we are possibly a descendant of one of the roots
1569 # and we haven't already been marked as an ancestor
1579 # and we haven't already been marked as an ancestor
1570 ancestors.add(n) # Mark as ancestor
1580 ancestors.add(n) # Mark as ancestor
1571 # Add non-nullid parents to list of nodes to tag.
1581 # Add non-nullid parents to list of nodes to tag.
1572 nodestotag.update(
1582 nodestotag.update(
1573 [p for p in self.parents(n) if p != self.nullid]
1583 [p for p in self.parents(n) if p != self.nullid]
1574 )
1584 )
1575 elif n in heads: # We've seen it before, is it a fake head?
1585 elif n in heads: # We've seen it before, is it a fake head?
1576 # So it is, real heads should not be the ancestors of
1586 # So it is, real heads should not be the ancestors of
1577 # any other heads.
1587 # any other heads.
1578 heads.pop(n)
1588 heads.pop(n)
1579 if not ancestors:
1589 if not ancestors:
1580 return nonodes
1590 return nonodes
1581 # Now that we have our set of ancestors, we want to remove any
1591 # Now that we have our set of ancestors, we want to remove any
1582 # roots that are not ancestors.
1592 # roots that are not ancestors.
1583
1593
1584 # If one of the roots was nullid, everything is included anyway.
1594 # If one of the roots was nullid, everything is included anyway.
1585 if lowestrev > nullrev:
1595 if lowestrev > nullrev:
1586 # But, since we weren't, let's recompute the lowest rev to not
1596 # But, since we weren't, let's recompute the lowest rev to not
1587 # include roots that aren't ancestors.
1597 # include roots that aren't ancestors.
1588
1598
1589 # Filter out roots that aren't ancestors of heads
1599 # Filter out roots that aren't ancestors of heads
1590 roots = [root for root in roots if root in ancestors]
1600 roots = [root for root in roots if root in ancestors]
1591 # Recompute the lowest revision
1601 # Recompute the lowest revision
1592 if roots:
1602 if roots:
1593 lowestrev = min([self.rev(root) for root in roots])
1603 lowestrev = min([self.rev(root) for root in roots])
1594 else:
1604 else:
1595 # No more roots? Return empty list
1605 # No more roots? Return empty list
1596 return nonodes
1606 return nonodes
1597 else:
1607 else:
1598 # We are descending from nullid, and don't need to care about
1608 # We are descending from nullid, and don't need to care about
1599 # any other roots.
1609 # any other roots.
1600 lowestrev = nullrev
1610 lowestrev = nullrev
1601 roots = [self.nullid]
1611 roots = [self.nullid]
1602 # Transform our roots list into a set.
1612 # Transform our roots list into a set.
1603 descendants = set(roots)
1613 descendants = set(roots)
1604 # Also, keep the original roots so we can filter out roots that aren't
1614 # Also, keep the original roots so we can filter out roots that aren't
1605 # 'real' roots (i.e. are descended from other roots).
1615 # 'real' roots (i.e. are descended from other roots).
1606 roots = descendants.copy()
1616 roots = descendants.copy()
1607 # Our topologically sorted list of output nodes.
1617 # Our topologically sorted list of output nodes.
1608 orderedout = []
1618 orderedout = []
1609 # Don't start at nullid since we don't want nullid in our output list,
1619 # Don't start at nullid since we don't want nullid in our output list,
1610 # and if nullid shows up in descendants, empty parents will look like
1620 # and if nullid shows up in descendants, empty parents will look like
1611 # they're descendants.
1621 # they're descendants.
1612 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1622 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1613 n = self.node(r)
1623 n = self.node(r)
1614 isdescendant = False
1624 isdescendant = False
1615 if lowestrev == nullrev: # Everybody is a descendant of nullid
1625 if lowestrev == nullrev: # Everybody is a descendant of nullid
1616 isdescendant = True
1626 isdescendant = True
1617 elif n in descendants:
1627 elif n in descendants:
1618 # n is already a descendant
1628 # n is already a descendant
1619 isdescendant = True
1629 isdescendant = True
1620 # This check only needs to be done here because all the roots
1630 # This check only needs to be done here because all the roots
1621 # will start being marked is descendants before the loop.
1631 # will start being marked is descendants before the loop.
1622 if n in roots:
1632 if n in roots:
1623 # If n was a root, check if it's a 'real' root.
1633 # If n was a root, check if it's a 'real' root.
1624 p = tuple(self.parents(n))
1634 p = tuple(self.parents(n))
1625 # If any of its parents are descendants, it's not a root.
1635 # If any of its parents are descendants, it's not a root.
1626 if (p[0] in descendants) or (p[1] in descendants):
1636 if (p[0] in descendants) or (p[1] in descendants):
1627 roots.remove(n)
1637 roots.remove(n)
1628 else:
1638 else:
1629 p = tuple(self.parents(n))
1639 p = tuple(self.parents(n))
1630 # A node is a descendant if either of its parents are
1640 # A node is a descendant if either of its parents are
1631 # descendants. (We seeded the dependents list with the roots
1641 # descendants. (We seeded the dependents list with the roots
1632 # up there, remember?)
1642 # up there, remember?)
1633 if (p[0] in descendants) or (p[1] in descendants):
1643 if (p[0] in descendants) or (p[1] in descendants):
1634 descendants.add(n)
1644 descendants.add(n)
1635 isdescendant = True
1645 isdescendant = True
1636 if isdescendant and ((ancestors is None) or (n in ancestors)):
1646 if isdescendant and ((ancestors is None) or (n in ancestors)):
1637 # Only include nodes that are both descendants and ancestors.
1647 # Only include nodes that are both descendants and ancestors.
1638 orderedout.append(n)
1648 orderedout.append(n)
1639 if (ancestors is not None) and (n in heads):
1649 if (ancestors is not None) and (n in heads):
1640 # We're trying to figure out which heads are reachable
1650 # We're trying to figure out which heads are reachable
1641 # from roots.
1651 # from roots.
1642 # Mark this head as having been reached
1652 # Mark this head as having been reached
1643 heads[n] = True
1653 heads[n] = True
1644 elif ancestors is None:
1654 elif ancestors is None:
1645 # Otherwise, we're trying to discover the heads.
1655 # Otherwise, we're trying to discover the heads.
1646 # Assume this is a head because if it isn't, the next step
1656 # Assume this is a head because if it isn't, the next step
1647 # will eventually remove it.
1657 # will eventually remove it.
1648 heads[n] = True
1658 heads[n] = True
1649 # But, obviously its parents aren't.
1659 # But, obviously its parents aren't.
1650 for p in self.parents(n):
1660 for p in self.parents(n):
1651 heads.pop(p, None)
1661 heads.pop(p, None)
1652 heads = [head for head, flag in heads.items() if flag]
1662 heads = [head for head, flag in heads.items() if flag]
1653 roots = list(roots)
1663 roots = list(roots)
1654 assert orderedout
1664 assert orderedout
1655 assert roots
1665 assert roots
1656 assert heads
1666 assert heads
1657 return (orderedout, roots, heads)
1667 return (orderedout, roots, heads)
1658
1668
1659 def headrevs(self, revs=None):
1669 def headrevs(self, revs=None):
1660 if revs is None:
1670 if revs is None:
1661 try:
1671 try:
1662 return self.index.headrevs()
1672 return self.index.headrevs()
1663 except AttributeError:
1673 except AttributeError:
1664 return self._headrevs()
1674 return self._headrevs()
1665 if rustdagop is not None and self.index.rust_ext_compat:
1675 if rustdagop is not None and self.index.rust_ext_compat:
1666 return rustdagop.headrevs(self.index, revs)
1676 return rustdagop.headrevs(self.index, revs)
1667 return dagop.headrevs(revs, self._uncheckedparentrevs)
1677 return dagop.headrevs(revs, self._uncheckedparentrevs)
1668
1678
1669 def computephases(self, roots):
1679 def computephases(self, roots):
1670 return self.index.computephasesmapsets(roots)
1680 return self.index.computephasesmapsets(roots)
1671
1681
1672 def _headrevs(self):
1682 def _headrevs(self):
1673 count = len(self)
1683 count = len(self)
1674 if not count:
1684 if not count:
1675 return [nullrev]
1685 return [nullrev]
1676 # we won't iter over filtered rev so nobody is a head at start
1686 # we won't iter over filtered rev so nobody is a head at start
1677 ishead = [0] * (count + 1)
1687 ishead = [0] * (count + 1)
1678 index = self.index
1688 index = self.index
1679 for r in self:
1689 for r in self:
1680 ishead[r] = 1 # I may be an head
1690 ishead[r] = 1 # I may be an head
1681 e = index[r]
1691 e = index[r]
1682 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1692 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1683 return [r for r, val in enumerate(ishead) if val]
1693 return [r for r, val in enumerate(ishead) if val]
1684
1694
1685 def heads(self, start=None, stop=None):
1695 def heads(self, start=None, stop=None):
1686 """return the list of all nodes that have no children
1696 """return the list of all nodes that have no children
1687
1697
1688 if start is specified, only heads that are descendants of
1698 if start is specified, only heads that are descendants of
1689 start will be returned
1699 start will be returned
1690 if stop is specified, it will consider all the revs from stop
1700 if stop is specified, it will consider all the revs from stop
1691 as if they had no children
1701 as if they had no children
1692 """
1702 """
1693 if start is None and stop is None:
1703 if start is None and stop is None:
1694 if not len(self):
1704 if not len(self):
1695 return [self.nullid]
1705 return [self.nullid]
1696 return [self.node(r) for r in self.headrevs()]
1706 return [self.node(r) for r in self.headrevs()]
1697
1707
1698 if start is None:
1708 if start is None:
1699 start = nullrev
1709 start = nullrev
1700 else:
1710 else:
1701 start = self.rev(start)
1711 start = self.rev(start)
1702
1712
1703 stoprevs = {self.rev(n) for n in stop or []}
1713 stoprevs = {self.rev(n) for n in stop or []}
1704
1714
1705 revs = dagop.headrevssubset(
1715 revs = dagop.headrevssubset(
1706 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1716 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1707 )
1717 )
1708
1718
1709 return [self.node(rev) for rev in revs]
1719 return [self.node(rev) for rev in revs]
1710
1720
1711 def children(self, node):
1721 def children(self, node):
1712 """find the children of a given node"""
1722 """find the children of a given node"""
1713 c = []
1723 c = []
1714 p = self.rev(node)
1724 p = self.rev(node)
1715 for r in self.revs(start=p + 1):
1725 for r in self.revs(start=p + 1):
1716 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1726 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1717 if prevs:
1727 if prevs:
1718 for pr in prevs:
1728 for pr in prevs:
1719 if pr == p:
1729 if pr == p:
1720 c.append(self.node(r))
1730 c.append(self.node(r))
1721 elif p == nullrev:
1731 elif p == nullrev:
1722 c.append(self.node(r))
1732 c.append(self.node(r))
1723 return c
1733 return c
1724
1734
1725 def commonancestorsheads(self, a, b):
1735 def commonancestorsheads(self, a, b):
1726 """calculate all the heads of the common ancestors of nodes a and b"""
1736 """calculate all the heads of the common ancestors of nodes a and b"""
1727 a, b = self.rev(a), self.rev(b)
1737 a, b = self.rev(a), self.rev(b)
1728 ancs = self._commonancestorsheads(a, b)
1738 ancs = self._commonancestorsheads(a, b)
1729 return pycompat.maplist(self.node, ancs)
1739 return pycompat.maplist(self.node, ancs)
1730
1740
1731 def _commonancestorsheads(self, *revs):
1741 def _commonancestorsheads(self, *revs):
1732 """calculate all the heads of the common ancestors of revs"""
1742 """calculate all the heads of the common ancestors of revs"""
1733 try:
1743 try:
1734 ancs = self.index.commonancestorsheads(*revs)
1744 ancs = self.index.commonancestorsheads(*revs)
1735 except (AttributeError, OverflowError): # C implementation failed
1745 except (AttributeError, OverflowError): # C implementation failed
1736 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1746 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1737 return ancs
1747 return ancs
1738
1748
1739 def isancestor(self, a, b):
1749 def isancestor(self, a, b):
1740 """return True if node a is an ancestor of node b
1750 """return True if node a is an ancestor of node b
1741
1751
1742 A revision is considered an ancestor of itself."""
1752 A revision is considered an ancestor of itself."""
1743 a, b = self.rev(a), self.rev(b)
1753 a, b = self.rev(a), self.rev(b)
1744 return self.isancestorrev(a, b)
1754 return self.isancestorrev(a, b)
1745
1755
1746 def isancestorrev(self, a, b):
1756 def isancestorrev(self, a, b):
1747 """return True if revision a is an ancestor of revision b
1757 """return True if revision a is an ancestor of revision b
1748
1758
1749 A revision is considered an ancestor of itself.
1759 A revision is considered an ancestor of itself.
1750
1760
1751 The implementation of this is trivial but the use of
1761 The implementation of this is trivial but the use of
1752 reachableroots is not."""
1762 reachableroots is not."""
1753 if a == nullrev:
1763 if a == nullrev:
1754 return True
1764 return True
1755 elif a == b:
1765 elif a == b:
1756 return True
1766 return True
1757 elif a > b:
1767 elif a > b:
1758 return False
1768 return False
1759 return bool(self.reachableroots(a, [b], [a], includepath=False))
1769 return bool(self.reachableroots(a, [b], [a], includepath=False))
1760
1770
1761 def reachableroots(self, minroot, heads, roots, includepath=False):
1771 def reachableroots(self, minroot, heads, roots, includepath=False):
1762 """return (heads(::(<roots> and <roots>::<heads>)))
1772 """return (heads(::(<roots> and <roots>::<heads>)))
1763
1773
1764 If includepath is True, return (<roots>::<heads>)."""
1774 If includepath is True, return (<roots>::<heads>)."""
1765 try:
1775 try:
1766 return self.index.reachableroots2(
1776 return self.index.reachableroots2(
1767 minroot, heads, roots, includepath
1777 minroot, heads, roots, includepath
1768 )
1778 )
1769 except AttributeError:
1779 except AttributeError:
1770 return dagop._reachablerootspure(
1780 return dagop._reachablerootspure(
1771 self.parentrevs, minroot, roots, heads, includepath
1781 self.parentrevs, minroot, roots, heads, includepath
1772 )
1782 )
1773
1783
1774 def ancestor(self, a, b):
1784 def ancestor(self, a, b):
1775 """calculate the "best" common ancestor of nodes a and b"""
1785 """calculate the "best" common ancestor of nodes a and b"""
1776
1786
1777 a, b = self.rev(a), self.rev(b)
1787 a, b = self.rev(a), self.rev(b)
1778 try:
1788 try:
1779 ancs = self.index.ancestors(a, b)
1789 ancs = self.index.ancestors(a, b)
1780 except (AttributeError, OverflowError):
1790 except (AttributeError, OverflowError):
1781 ancs = ancestor.ancestors(self.parentrevs, a, b)
1791 ancs = ancestor.ancestors(self.parentrevs, a, b)
1782 if ancs:
1792 if ancs:
1783 # choose a consistent winner when there's a tie
1793 # choose a consistent winner when there's a tie
1784 return min(map(self.node, ancs))
1794 return min(map(self.node, ancs))
1785 return self.nullid
1795 return self.nullid
1786
1796
1787 def _match(self, id):
1797 def _match(self, id):
1788 if isinstance(id, int):
1798 if isinstance(id, int):
1789 # rev
1799 # rev
1790 return self.node(id)
1800 return self.node(id)
1791 if len(id) == self.nodeconstants.nodelen:
1801 if len(id) == self.nodeconstants.nodelen:
1792 # possibly a binary node
1802 # possibly a binary node
1793 # odds of a binary node being all hex in ASCII are 1 in 10**25
1803 # odds of a binary node being all hex in ASCII are 1 in 10**25
1794 try:
1804 try:
1795 node = id
1805 node = id
1796 self.rev(node) # quick search the index
1806 self.rev(node) # quick search the index
1797 return node
1807 return node
1798 except error.LookupError:
1808 except error.LookupError:
1799 pass # may be partial hex id
1809 pass # may be partial hex id
1800 try:
1810 try:
1801 # str(rev)
1811 # str(rev)
1802 rev = int(id)
1812 rev = int(id)
1803 if b"%d" % rev != id:
1813 if b"%d" % rev != id:
1804 raise ValueError
1814 raise ValueError
1805 if rev < 0:
1815 if rev < 0:
1806 rev = len(self) + rev
1816 rev = len(self) + rev
1807 if rev < 0 or rev >= len(self):
1817 if rev < 0 or rev >= len(self):
1808 raise ValueError
1818 raise ValueError
1809 return self.node(rev)
1819 return self.node(rev)
1810 except (ValueError, OverflowError):
1820 except (ValueError, OverflowError):
1811 pass
1821 pass
1812 if len(id) == 2 * self.nodeconstants.nodelen:
1822 if len(id) == 2 * self.nodeconstants.nodelen:
1813 try:
1823 try:
1814 # a full hex nodeid?
1824 # a full hex nodeid?
1815 node = bin(id)
1825 node = bin(id)
1816 self.rev(node)
1826 self.rev(node)
1817 return node
1827 return node
1818 except (binascii.Error, error.LookupError):
1828 except (binascii.Error, error.LookupError):
1819 pass
1829 pass
1820
1830
1821 def _partialmatch(self, id):
1831 def _partialmatch(self, id):
1822 # we don't care wdirfilenodeids as they should be always full hash
1832 # we don't care wdirfilenodeids as they should be always full hash
1823 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1833 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1824 ambiguous = False
1834 ambiguous = False
1825 try:
1835 try:
1826 partial = self.index.partialmatch(id)
1836 partial = self.index.partialmatch(id)
1827 if partial and self.hasnode(partial):
1837 if partial and self.hasnode(partial):
1828 if maybewdir:
1838 if maybewdir:
1829 # single 'ff...' match in radix tree, ambiguous with wdir
1839 # single 'ff...' match in radix tree, ambiguous with wdir
1830 ambiguous = True
1840 ambiguous = True
1831 else:
1841 else:
1832 return partial
1842 return partial
1833 elif maybewdir:
1843 elif maybewdir:
1834 # no 'ff...' match in radix tree, wdir identified
1844 # no 'ff...' match in radix tree, wdir identified
1835 raise error.WdirUnsupported
1845 raise error.WdirUnsupported
1836 else:
1846 else:
1837 return None
1847 return None
1838 except error.RevlogError:
1848 except error.RevlogError:
1839 # parsers.c radix tree lookup gave multiple matches
1849 # parsers.c radix tree lookup gave multiple matches
1840 # fast path: for unfiltered changelog, radix tree is accurate
1850 # fast path: for unfiltered changelog, radix tree is accurate
1841 if not getattr(self, 'filteredrevs', None):
1851 if not getattr(self, 'filteredrevs', None):
1842 ambiguous = True
1852 ambiguous = True
1843 # fall through to slow path that filters hidden revisions
1853 # fall through to slow path that filters hidden revisions
1844 except (AttributeError, ValueError):
1854 except (AttributeError, ValueError):
1845 # we are pure python, or key is not hex
1855 # we are pure python, or key is not hex
1846 pass
1856 pass
1847 if ambiguous:
1857 if ambiguous:
1848 raise error.AmbiguousPrefixLookupError(
1858 raise error.AmbiguousPrefixLookupError(
1849 id, self.display_id, _(b'ambiguous identifier')
1859 id, self.display_id, _(b'ambiguous identifier')
1850 )
1860 )
1851
1861
1852 if id in self._pcache:
1862 if id in self._pcache:
1853 return self._pcache[id]
1863 return self._pcache[id]
1854
1864
1855 if len(id) <= 40:
1865 if len(id) <= 40:
1856 # hex(node)[:...]
1866 # hex(node)[:...]
1857 l = len(id) // 2 * 2 # grab an even number of digits
1867 l = len(id) // 2 * 2 # grab an even number of digits
1858 try:
1868 try:
1859 # we're dropping the last digit, so let's check that it's hex,
1869 # we're dropping the last digit, so let's check that it's hex,
1860 # to avoid the expensive computation below if it's not
1870 # to avoid the expensive computation below if it's not
1861 if len(id) % 2 > 0:
1871 if len(id) % 2 > 0:
1862 if not (id[-1] in hexdigits):
1872 if not (id[-1] in hexdigits):
1863 return None
1873 return None
1864 prefix = bin(id[:l])
1874 prefix = bin(id[:l])
1865 except binascii.Error:
1875 except binascii.Error:
1866 pass
1876 pass
1867 else:
1877 else:
1868 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1878 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1869 nl = [
1879 nl = [
1870 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1880 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1871 ]
1881 ]
1872 if self.nodeconstants.nullhex.startswith(id):
1882 if self.nodeconstants.nullhex.startswith(id):
1873 nl.append(self.nullid)
1883 nl.append(self.nullid)
1874 if len(nl) > 0:
1884 if len(nl) > 0:
1875 if len(nl) == 1 and not maybewdir:
1885 if len(nl) == 1 and not maybewdir:
1876 self._pcache[id] = nl[0]
1886 self._pcache[id] = nl[0]
1877 return nl[0]
1887 return nl[0]
1878 raise error.AmbiguousPrefixLookupError(
1888 raise error.AmbiguousPrefixLookupError(
1879 id, self.display_id, _(b'ambiguous identifier')
1889 id, self.display_id, _(b'ambiguous identifier')
1880 )
1890 )
1881 if maybewdir:
1891 if maybewdir:
1882 raise error.WdirUnsupported
1892 raise error.WdirUnsupported
1883 return None
1893 return None
1884
1894
1885 def lookup(self, id):
1895 def lookup(self, id):
1886 """locate a node based on:
1896 """locate a node based on:
1887 - revision number or str(revision number)
1897 - revision number or str(revision number)
1888 - nodeid or subset of hex nodeid
1898 - nodeid or subset of hex nodeid
1889 """
1899 """
1890 n = self._match(id)
1900 n = self._match(id)
1891 if n is not None:
1901 if n is not None:
1892 return n
1902 return n
1893 n = self._partialmatch(id)
1903 n = self._partialmatch(id)
1894 if n:
1904 if n:
1895 return n
1905 return n
1896
1906
1897 raise error.LookupError(id, self.display_id, _(b'no match found'))
1907 raise error.LookupError(id, self.display_id, _(b'no match found'))
1898
1908
1899 def shortest(self, node, minlength=1):
1909 def shortest(self, node, minlength=1):
1900 """Find the shortest unambiguous prefix that matches node."""
1910 """Find the shortest unambiguous prefix that matches node."""
1901
1911
1902 def isvalid(prefix):
1912 def isvalid(prefix):
1903 try:
1913 try:
1904 matchednode = self._partialmatch(prefix)
1914 matchednode = self._partialmatch(prefix)
1905 except error.AmbiguousPrefixLookupError:
1915 except error.AmbiguousPrefixLookupError:
1906 return False
1916 return False
1907 except error.WdirUnsupported:
1917 except error.WdirUnsupported:
1908 # single 'ff...' match
1918 # single 'ff...' match
1909 return True
1919 return True
1910 if matchednode is None:
1920 if matchednode is None:
1911 raise error.LookupError(node, self.display_id, _(b'no node'))
1921 raise error.LookupError(node, self.display_id, _(b'no node'))
1912 return True
1922 return True
1913
1923
1914 def maybewdir(prefix):
1924 def maybewdir(prefix):
1915 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1925 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1916
1926
1917 hexnode = hex(node)
1927 hexnode = hex(node)
1918
1928
1919 def disambiguate(hexnode, minlength):
1929 def disambiguate(hexnode, minlength):
1920 """Disambiguate against wdirid."""
1930 """Disambiguate against wdirid."""
1921 for length in range(minlength, len(hexnode) + 1):
1931 for length in range(minlength, len(hexnode) + 1):
1922 prefix = hexnode[:length]
1932 prefix = hexnode[:length]
1923 if not maybewdir(prefix):
1933 if not maybewdir(prefix):
1924 return prefix
1934 return prefix
1925
1935
1926 if not getattr(self, 'filteredrevs', None):
1936 if not getattr(self, 'filteredrevs', None):
1927 try:
1937 try:
1928 length = max(self.index.shortest(node), minlength)
1938 length = max(self.index.shortest(node), minlength)
1929 return disambiguate(hexnode, length)
1939 return disambiguate(hexnode, length)
1930 except error.RevlogError:
1940 except error.RevlogError:
1931 if node != self.nodeconstants.wdirid:
1941 if node != self.nodeconstants.wdirid:
1932 raise error.LookupError(
1942 raise error.LookupError(
1933 node, self.display_id, _(b'no node')
1943 node, self.display_id, _(b'no node')
1934 )
1944 )
1935 except AttributeError:
1945 except AttributeError:
1936 # Fall through to pure code
1946 # Fall through to pure code
1937 pass
1947 pass
1938
1948
1939 if node == self.nodeconstants.wdirid:
1949 if node == self.nodeconstants.wdirid:
1940 for length in range(minlength, len(hexnode) + 1):
1950 for length in range(minlength, len(hexnode) + 1):
1941 prefix = hexnode[:length]
1951 prefix = hexnode[:length]
1942 if isvalid(prefix):
1952 if isvalid(prefix):
1943 return prefix
1953 return prefix
1944
1954
1945 for length in range(minlength, len(hexnode) + 1):
1955 for length in range(minlength, len(hexnode) + 1):
1946 prefix = hexnode[:length]
1956 prefix = hexnode[:length]
1947 if isvalid(prefix):
1957 if isvalid(prefix):
1948 return disambiguate(hexnode, length)
1958 return disambiguate(hexnode, length)
1949
1959
1950 def cmp(self, node, text):
1960 def cmp(self, node, text):
1951 """compare text with a given file revision
1961 """compare text with a given file revision
1952
1962
1953 returns True if text is different than what is stored.
1963 returns True if text is different than what is stored.
1954 """
1964 """
1955 p1, p2 = self.parents(node)
1965 p1, p2 = self.parents(node)
1956 return storageutil.hashrevisionsha1(text, p1, p2) != node
1966 return storageutil.hashrevisionsha1(text, p1, p2) != node
1957
1967
1958 def _getsegmentforrevs(self, startrev, endrev):
1968 def _getsegmentforrevs(self, startrev, endrev):
1959 """Obtain a segment of raw data corresponding to a range of revisions.
1969 """Obtain a segment of raw data corresponding to a range of revisions.
1960
1970
1961 Accepts the start and end revisions and an optional already-open
1971 Accepts the start and end revisions and an optional already-open
1962 file handle to be used for reading. If the file handle is read, its
1972 file handle to be used for reading. If the file handle is read, its
1963 seek position will not be preserved.
1973 seek position will not be preserved.
1964
1974
1965 Requests for data may be satisfied by a cache.
1975 Requests for data may be satisfied by a cache.
1966
1976
1967 Returns a 2-tuple of (offset, data) for the requested range of
1977 Returns a 2-tuple of (offset, data) for the requested range of
1968 revisions. Offset is the integer offset from the beginning of the
1978 revisions. Offset is the integer offset from the beginning of the
1969 revlog and data is a str or buffer of the raw byte data.
1979 revlog and data is a str or buffer of the raw byte data.
1970
1980
1971 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1981 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1972 to determine where each revision's data begins and ends.
1982 to determine where each revision's data begins and ends.
1973 """
1983 """
1974 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1984 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1975 # (functions are expensive).
1985 # (functions are expensive).
1976 index = self.index
1986 index = self.index
1977 istart = index[startrev]
1987 istart = index[startrev]
1978 start = int(istart[0] >> 16)
1988 start = int(istart[0] >> 16)
1979 if startrev == endrev:
1989 if startrev == endrev:
1980 end = start + istart[1]
1990 end = start + istart[1]
1981 else:
1991 else:
1982 iend = index[endrev]
1992 iend = index[endrev]
1983 end = int(iend[0] >> 16) + iend[1]
1993 end = int(iend[0] >> 16) + iend[1]
1984
1994
1985 if self._inline:
1995 if self._inline:
1986 start += (startrev + 1) * self.index.entry_size
1996 start += (startrev + 1) * self.index.entry_size
1987 end += (endrev + 1) * self.index.entry_size
1997 end += (endrev + 1) * self.index.entry_size
1988 length = end - start
1998 length = end - start
1989
1999
1990 return start, self._segmentfile.read_chunk(start, length)
2000 return start, self._segmentfile.read_chunk(start, length)
1991
2001
1992 def _chunk(self, rev):
2002 def _chunk(self, rev):
1993 """Obtain a single decompressed chunk for a revision.
2003 """Obtain a single decompressed chunk for a revision.
1994
2004
1995 Accepts an integer revision and an optional already-open file handle
2005 Accepts an integer revision and an optional already-open file handle
1996 to be used for reading. If used, the seek position of the file will not
2006 to be used for reading. If used, the seek position of the file will not
1997 be preserved.
2007 be preserved.
1998
2008
1999 Returns a str holding uncompressed data for the requested revision.
2009 Returns a str holding uncompressed data for the requested revision.
2000 """
2010 """
2001 compression_mode = self.index[rev][10]
2011 compression_mode = self.index[rev][10]
2002 data = self._getsegmentforrevs(rev, rev)[1]
2012 data = self._getsegmentforrevs(rev, rev)[1]
2003 if compression_mode == COMP_MODE_PLAIN:
2013 if compression_mode == COMP_MODE_PLAIN:
2004 return data
2014 return data
2005 elif compression_mode == COMP_MODE_DEFAULT:
2015 elif compression_mode == COMP_MODE_DEFAULT:
2006 return self._decompressor(data)
2016 return self._decompressor(data)
2007 elif compression_mode == COMP_MODE_INLINE:
2017 elif compression_mode == COMP_MODE_INLINE:
2008 return self.decompress(data)
2018 return self.decompress(data)
2009 else:
2019 else:
2010 msg = b'unknown compression mode %d'
2020 msg = b'unknown compression mode %d'
2011 msg %= compression_mode
2021 msg %= compression_mode
2012 raise error.RevlogError(msg)
2022 raise error.RevlogError(msg)
2013
2023
2014 def _chunks(self, revs, targetsize=None):
2024 def _chunks(self, revs, targetsize=None):
2015 """Obtain decompressed chunks for the specified revisions.
2025 """Obtain decompressed chunks for the specified revisions.
2016
2026
2017 Accepts an iterable of numeric revisions that are assumed to be in
2027 Accepts an iterable of numeric revisions that are assumed to be in
2018 ascending order. Also accepts an optional already-open file handle
2028 ascending order. Also accepts an optional already-open file handle
2019 to be used for reading. If used, the seek position of the file will
2029 to be used for reading. If used, the seek position of the file will
2020 not be preserved.
2030 not be preserved.
2021
2031
2022 This function is similar to calling ``self._chunk()`` multiple times,
2032 This function is similar to calling ``self._chunk()`` multiple times,
2023 but is faster.
2033 but is faster.
2024
2034
2025 Returns a list with decompressed data for each requested revision.
2035 Returns a list with decompressed data for each requested revision.
2026 """
2036 """
2027 if not revs:
2037 if not revs:
2028 return []
2038 return []
2029 start = self.start
2039 start = self.start
2030 length = self.length
2040 length = self.length
2031 inline = self._inline
2041 inline = self._inline
2032 iosize = self.index.entry_size
2042 iosize = self.index.entry_size
2033 buffer = util.buffer
2043 buffer = util.buffer
2034
2044
2035 l = []
2045 l = []
2036 ladd = l.append
2046 ladd = l.append
2037
2047
2038 if not self._withsparseread:
2048 if not self._withsparseread:
2039 slicedchunks = (revs,)
2049 slicedchunks = (revs,)
2040 else:
2050 else:
2041 slicedchunks = deltautil.slicechunk(
2051 slicedchunks = deltautil.slicechunk(
2042 self, revs, targetsize=targetsize
2052 self, revs, targetsize=targetsize
2043 )
2053 )
2044
2054
2045 for revschunk in slicedchunks:
2055 for revschunk in slicedchunks:
2046 firstrev = revschunk[0]
2056 firstrev = revschunk[0]
2047 # Skip trailing revisions with empty diff
2057 # Skip trailing revisions with empty diff
2048 for lastrev in revschunk[::-1]:
2058 for lastrev in revschunk[::-1]:
2049 if length(lastrev) != 0:
2059 if length(lastrev) != 0:
2050 break
2060 break
2051
2061
2052 try:
2062 try:
2053 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2063 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2054 except OverflowError:
2064 except OverflowError:
2055 # issue4215 - we can't cache a run of chunks greater than
2065 # issue4215 - we can't cache a run of chunks greater than
2056 # 2G on Windows
2066 # 2G on Windows
2057 return [self._chunk(rev) for rev in revschunk]
2067 return [self._chunk(rev) for rev in revschunk]
2058
2068
2059 decomp = self.decompress
2069 decomp = self.decompress
2060 # self._decompressor might be None, but will not be used in that case
2070 # self._decompressor might be None, but will not be used in that case
2061 def_decomp = self._decompressor
2071 def_decomp = self._decompressor
2062 for rev in revschunk:
2072 for rev in revschunk:
2063 chunkstart = start(rev)
2073 chunkstart = start(rev)
2064 if inline:
2074 if inline:
2065 chunkstart += (rev + 1) * iosize
2075 chunkstart += (rev + 1) * iosize
2066 chunklength = length(rev)
2076 chunklength = length(rev)
2067 comp_mode = self.index[rev][10]
2077 comp_mode = self.index[rev][10]
2068 c = buffer(data, chunkstart - offset, chunklength)
2078 c = buffer(data, chunkstart - offset, chunklength)
2069 if comp_mode == COMP_MODE_PLAIN:
2079 if comp_mode == COMP_MODE_PLAIN:
2070 ladd(c)
2080 ladd(c)
2071 elif comp_mode == COMP_MODE_INLINE:
2081 elif comp_mode == COMP_MODE_INLINE:
2072 ladd(decomp(c))
2082 ladd(decomp(c))
2073 elif comp_mode == COMP_MODE_DEFAULT:
2083 elif comp_mode == COMP_MODE_DEFAULT:
2074 ladd(def_decomp(c))
2084 ladd(def_decomp(c))
2075 else:
2085 else:
2076 msg = b'unknown compression mode %d'
2086 msg = b'unknown compression mode %d'
2077 msg %= comp_mode
2087 msg %= comp_mode
2078 raise error.RevlogError(msg)
2088 raise error.RevlogError(msg)
2079
2089
2080 return l
2090 return l
2081
2091
2082 def deltaparent(self, rev):
2092 def deltaparent(self, rev):
2083 """return deltaparent of the given revision"""
2093 """return deltaparent of the given revision"""
2084 base = self.index[rev][3]
2094 base = self.index[rev][3]
2085 if base == rev:
2095 if base == rev:
2086 return nullrev
2096 return nullrev
2087 elif self._generaldelta:
2097 elif self._generaldelta:
2088 return base
2098 return base
2089 else:
2099 else:
2090 return rev - 1
2100 return rev - 1
2091
2101
2092 def issnapshot(self, rev):
2102 def issnapshot(self, rev):
2093 """tells whether rev is a snapshot"""
2103 """tells whether rev is a snapshot"""
2094 if not self._sparserevlog:
2104 if not self._sparserevlog:
2095 return self.deltaparent(rev) == nullrev
2105 return self.deltaparent(rev) == nullrev
2096 elif hasattr(self.index, 'issnapshot'):
2106 elif hasattr(self.index, 'issnapshot'):
2097 # directly assign the method to cache the testing and access
2107 # directly assign the method to cache the testing and access
2098 self.issnapshot = self.index.issnapshot
2108 self.issnapshot = self.index.issnapshot
2099 return self.issnapshot(rev)
2109 return self.issnapshot(rev)
2100 if rev == nullrev:
2110 if rev == nullrev:
2101 return True
2111 return True
2102 entry = self.index[rev]
2112 entry = self.index[rev]
2103 base = entry[3]
2113 base = entry[3]
2104 if base == rev:
2114 if base == rev:
2105 return True
2115 return True
2106 if base == nullrev:
2116 if base == nullrev:
2107 return True
2117 return True
2108 p1 = entry[5]
2118 p1 = entry[5]
2109 while self.length(p1) == 0:
2119 while self.length(p1) == 0:
2110 b = self.deltaparent(p1)
2120 b = self.deltaparent(p1)
2111 if b == p1:
2121 if b == p1:
2112 break
2122 break
2113 p1 = b
2123 p1 = b
2114 p2 = entry[6]
2124 p2 = entry[6]
2115 while self.length(p2) == 0:
2125 while self.length(p2) == 0:
2116 b = self.deltaparent(p2)
2126 b = self.deltaparent(p2)
2117 if b == p2:
2127 if b == p2:
2118 break
2128 break
2119 p2 = b
2129 p2 = b
2120 if base == p1 or base == p2:
2130 if base == p1 or base == p2:
2121 return False
2131 return False
2122 return self.issnapshot(base)
2132 return self.issnapshot(base)
2123
2133
2124 def snapshotdepth(self, rev):
2134 def snapshotdepth(self, rev):
2125 """number of snapshot in the chain before this one"""
2135 """number of snapshot in the chain before this one"""
2126 if not self.issnapshot(rev):
2136 if not self.issnapshot(rev):
2127 raise error.ProgrammingError(b'revision %d not a snapshot')
2137 raise error.ProgrammingError(b'revision %d not a snapshot')
2128 return len(self._deltachain(rev)[0]) - 1
2138 return len(self._deltachain(rev)[0]) - 1
2129
2139
2130 def revdiff(self, rev1, rev2):
2140 def revdiff(self, rev1, rev2):
2131 """return or calculate a delta between two revisions
2141 """return or calculate a delta between two revisions
2132
2142
2133 The delta calculated is in binary form and is intended to be written to
2143 The delta calculated is in binary form and is intended to be written to
2134 revlog data directly. So this function needs raw revision data.
2144 revlog data directly. So this function needs raw revision data.
2135 """
2145 """
2136 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2146 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2137 return bytes(self._chunk(rev2))
2147 return bytes(self._chunk(rev2))
2138
2148
2139 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2149 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2140
2150
2141 def revision(self, nodeorrev):
2151 def revision(self, nodeorrev):
2142 """return an uncompressed revision of a given node or revision
2152 """return an uncompressed revision of a given node or revision
2143 number.
2153 number.
2144 """
2154 """
2145 return self._revisiondata(nodeorrev)
2155 return self._revisiondata(nodeorrev)
2146
2156
2147 def sidedata(self, nodeorrev):
2157 def sidedata(self, nodeorrev):
2148 """a map of extra data related to the changeset but not part of the hash
2158 """a map of extra data related to the changeset but not part of the hash
2149
2159
2150 This function currently return a dictionary. However, more advanced
2160 This function currently return a dictionary. However, more advanced
2151 mapping object will likely be used in the future for a more
2161 mapping object will likely be used in the future for a more
2152 efficient/lazy code.
2162 efficient/lazy code.
2153 """
2163 """
2154 # deal with <nodeorrev> argument type
2164 # deal with <nodeorrev> argument type
2155 if isinstance(nodeorrev, int):
2165 if isinstance(nodeorrev, int):
2156 rev = nodeorrev
2166 rev = nodeorrev
2157 else:
2167 else:
2158 rev = self.rev(nodeorrev)
2168 rev = self.rev(nodeorrev)
2159 return self._sidedata(rev)
2169 return self._sidedata(rev)
2160
2170
2161 def _revisiondata(self, nodeorrev, raw=False):
2171 def _revisiondata(self, nodeorrev, raw=False):
2162 # deal with <nodeorrev> argument type
2172 # deal with <nodeorrev> argument type
2163 if isinstance(nodeorrev, int):
2173 if isinstance(nodeorrev, int):
2164 rev = nodeorrev
2174 rev = nodeorrev
2165 node = self.node(rev)
2175 node = self.node(rev)
2166 else:
2176 else:
2167 node = nodeorrev
2177 node = nodeorrev
2168 rev = None
2178 rev = None
2169
2179
2170 # fast path the special `nullid` rev
2180 # fast path the special `nullid` rev
2171 if node == self.nullid:
2181 if node == self.nullid:
2172 return b""
2182 return b""
2173
2183
2174 # ``rawtext`` is the text as stored inside the revlog. Might be the
2184 # ``rawtext`` is the text as stored inside the revlog. Might be the
2175 # revision or might need to be processed to retrieve the revision.
2185 # revision or might need to be processed to retrieve the revision.
2176 rev, rawtext, validated = self._rawtext(node, rev)
2186 rev, rawtext, validated = self._rawtext(node, rev)
2177
2187
2178 if raw and validated:
2188 if raw and validated:
2179 # if we don't want to process the raw text and that raw
2189 # if we don't want to process the raw text and that raw
2180 # text is cached, we can exit early.
2190 # text is cached, we can exit early.
2181 return rawtext
2191 return rawtext
2182 if rev is None:
2192 if rev is None:
2183 rev = self.rev(node)
2193 rev = self.rev(node)
2184 # the revlog's flag for this revision
2194 # the revlog's flag for this revision
2185 # (usually alter its state or content)
2195 # (usually alter its state or content)
2186 flags = self.flags(rev)
2196 flags = self.flags(rev)
2187
2197
2188 if validated and flags == REVIDX_DEFAULT_FLAGS:
2198 if validated and flags == REVIDX_DEFAULT_FLAGS:
2189 # no extra flags set, no flag processor runs, text = rawtext
2199 # no extra flags set, no flag processor runs, text = rawtext
2190 return rawtext
2200 return rawtext
2191
2201
2192 if raw:
2202 if raw:
2193 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2203 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2194 text = rawtext
2204 text = rawtext
2195 else:
2205 else:
2196 r = flagutil.processflagsread(self, rawtext, flags)
2206 r = flagutil.processflagsread(self, rawtext, flags)
2197 text, validatehash = r
2207 text, validatehash = r
2198 if validatehash:
2208 if validatehash:
2199 self.checkhash(text, node, rev=rev)
2209 self.checkhash(text, node, rev=rev)
2200 if not validated:
2210 if not validated:
2201 self._revisioncache = (node, rev, rawtext)
2211 self._revisioncache = (node, rev, rawtext)
2202
2212
2203 return text
2213 return text
2204
2214
2205 def _rawtext(self, node, rev):
2215 def _rawtext(self, node, rev):
2206 """return the possibly unvalidated rawtext for a revision
2216 """return the possibly unvalidated rawtext for a revision
2207
2217
2208 returns (rev, rawtext, validated)
2218 returns (rev, rawtext, validated)
2209 """
2219 """
2210
2220
2211 # revision in the cache (could be useful to apply delta)
2221 # revision in the cache (could be useful to apply delta)
2212 cachedrev = None
2222 cachedrev = None
2213 # An intermediate text to apply deltas to
2223 # An intermediate text to apply deltas to
2214 basetext = None
2224 basetext = None
2215
2225
2216 # Check if we have the entry in cache
2226 # Check if we have the entry in cache
2217 # The cache entry looks like (node, rev, rawtext)
2227 # The cache entry looks like (node, rev, rawtext)
2218 if self._revisioncache:
2228 if self._revisioncache:
2219 if self._revisioncache[0] == node:
2229 if self._revisioncache[0] == node:
2220 return (rev, self._revisioncache[2], True)
2230 return (rev, self._revisioncache[2], True)
2221 cachedrev = self._revisioncache[1]
2231 cachedrev = self._revisioncache[1]
2222
2232
2223 if rev is None:
2233 if rev is None:
2224 rev = self.rev(node)
2234 rev = self.rev(node)
2225
2235
2226 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2236 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2227 if stopped:
2237 if stopped:
2228 basetext = self._revisioncache[2]
2238 basetext = self._revisioncache[2]
2229
2239
2230 # drop cache to save memory, the caller is expected to
2240 # drop cache to save memory, the caller is expected to
2231 # update self._revisioncache after validating the text
2241 # update self._revisioncache after validating the text
2232 self._revisioncache = None
2242 self._revisioncache = None
2233
2243
2234 targetsize = None
2244 targetsize = None
2235 rawsize = self.index[rev][2]
2245 rawsize = self.index[rev][2]
2236 if 0 <= rawsize:
2246 if 0 <= rawsize:
2237 targetsize = 4 * rawsize
2247 targetsize = 4 * rawsize
2238
2248
2239 bins = self._chunks(chain, targetsize=targetsize)
2249 bins = self._chunks(chain, targetsize=targetsize)
2240 if basetext is None:
2250 if basetext is None:
2241 basetext = bytes(bins[0])
2251 basetext = bytes(bins[0])
2242 bins = bins[1:]
2252 bins = bins[1:]
2243
2253
2244 rawtext = mdiff.patches(basetext, bins)
2254 rawtext = mdiff.patches(basetext, bins)
2245 del basetext # let us have a chance to free memory early
2255 del basetext # let us have a chance to free memory early
2246 return (rev, rawtext, False)
2256 return (rev, rawtext, False)
2247
2257
2248 def _sidedata(self, rev):
2258 def _sidedata(self, rev):
2249 """Return the sidedata for a given revision number."""
2259 """Return the sidedata for a given revision number."""
2250 index_entry = self.index[rev]
2260 index_entry = self.index[rev]
2251 sidedata_offset = index_entry[8]
2261 sidedata_offset = index_entry[8]
2252 sidedata_size = index_entry[9]
2262 sidedata_size = index_entry[9]
2253
2263
2254 if self._inline:
2264 if self._inline:
2255 sidedata_offset += self.index.entry_size * (1 + rev)
2265 sidedata_offset += self.index.entry_size * (1 + rev)
2256 if sidedata_size == 0:
2266 if sidedata_size == 0:
2257 return {}
2267 return {}
2258
2268
2259 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2269 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2260 filename = self._sidedatafile
2270 filename = self._sidedatafile
2261 end = self._docket.sidedata_end
2271 end = self._docket.sidedata_end
2262 offset = sidedata_offset
2272 offset = sidedata_offset
2263 length = sidedata_size
2273 length = sidedata_size
2264 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2274 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2265 raise error.RevlogError(m)
2275 raise error.RevlogError(m)
2266
2276
2267 comp_segment = self._segmentfile_sidedata.read_chunk(
2277 comp_segment = self._segmentfile_sidedata.read_chunk(
2268 sidedata_offset, sidedata_size
2278 sidedata_offset, sidedata_size
2269 )
2279 )
2270
2280
2271 comp = self.index[rev][11]
2281 comp = self.index[rev][11]
2272 if comp == COMP_MODE_PLAIN:
2282 if comp == COMP_MODE_PLAIN:
2273 segment = comp_segment
2283 segment = comp_segment
2274 elif comp == COMP_MODE_DEFAULT:
2284 elif comp == COMP_MODE_DEFAULT:
2275 segment = self._decompressor(comp_segment)
2285 segment = self._decompressor(comp_segment)
2276 elif comp == COMP_MODE_INLINE:
2286 elif comp == COMP_MODE_INLINE:
2277 segment = self.decompress(comp_segment)
2287 segment = self.decompress(comp_segment)
2278 else:
2288 else:
2279 msg = b'unknown compression mode %d'
2289 msg = b'unknown compression mode %d'
2280 msg %= comp
2290 msg %= comp
2281 raise error.RevlogError(msg)
2291 raise error.RevlogError(msg)
2282
2292
2283 sidedata = sidedatautil.deserialize_sidedata(segment)
2293 sidedata = sidedatautil.deserialize_sidedata(segment)
2284 return sidedata
2294 return sidedata
2285
2295
2286 def rawdata(self, nodeorrev):
2296 def rawdata(self, nodeorrev):
2287 """return an uncompressed raw data of a given node or revision number."""
2297 """return an uncompressed raw data of a given node or revision number."""
2288 return self._revisiondata(nodeorrev, raw=True)
2298 return self._revisiondata(nodeorrev, raw=True)
2289
2299
2290 def hash(self, text, p1, p2):
2300 def hash(self, text, p1, p2):
2291 """Compute a node hash.
2301 """Compute a node hash.
2292
2302
2293 Available as a function so that subclasses can replace the hash
2303 Available as a function so that subclasses can replace the hash
2294 as needed.
2304 as needed.
2295 """
2305 """
2296 return storageutil.hashrevisionsha1(text, p1, p2)
2306 return storageutil.hashrevisionsha1(text, p1, p2)
2297
2307
2298 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2308 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2299 """Check node hash integrity.
2309 """Check node hash integrity.
2300
2310
2301 Available as a function so that subclasses can extend hash mismatch
2311 Available as a function so that subclasses can extend hash mismatch
2302 behaviors as needed.
2312 behaviors as needed.
2303 """
2313 """
2304 try:
2314 try:
2305 if p1 is None and p2 is None:
2315 if p1 is None and p2 is None:
2306 p1, p2 = self.parents(node)
2316 p1, p2 = self.parents(node)
2307 if node != self.hash(text, p1, p2):
2317 if node != self.hash(text, p1, p2):
2308 # Clear the revision cache on hash failure. The revision cache
2318 # Clear the revision cache on hash failure. The revision cache
2309 # only stores the raw revision and clearing the cache does have
2319 # only stores the raw revision and clearing the cache does have
2310 # the side-effect that we won't have a cache hit when the raw
2320 # the side-effect that we won't have a cache hit when the raw
2311 # revision data is accessed. But this case should be rare and
2321 # revision data is accessed. But this case should be rare and
2312 # it is extra work to teach the cache about the hash
2322 # it is extra work to teach the cache about the hash
2313 # verification state.
2323 # verification state.
2314 if self._revisioncache and self._revisioncache[0] == node:
2324 if self._revisioncache and self._revisioncache[0] == node:
2315 self._revisioncache = None
2325 self._revisioncache = None
2316
2326
2317 revornode = rev
2327 revornode = rev
2318 if revornode is None:
2328 if revornode is None:
2319 revornode = templatefilters.short(hex(node))
2329 revornode = templatefilters.short(hex(node))
2320 raise error.RevlogError(
2330 raise error.RevlogError(
2321 _(b"integrity check failed on %s:%s")
2331 _(b"integrity check failed on %s:%s")
2322 % (self.display_id, pycompat.bytestr(revornode))
2332 % (self.display_id, pycompat.bytestr(revornode))
2323 )
2333 )
2324 except error.RevlogError:
2334 except error.RevlogError:
2325 if self._censorable and storageutil.iscensoredtext(text):
2335 if self._censorable and storageutil.iscensoredtext(text):
2326 raise error.CensoredNodeError(self.display_id, node, text)
2336 raise error.CensoredNodeError(self.display_id, node, text)
2327 raise
2337 raise
2328
2338
2329 @property
2339 @property
2330 def _split_index_file(self):
2340 def _split_index_file(self):
2331 """the path where to expect the index of an ongoing splitting operation
2341 """the path where to expect the index of an ongoing splitting operation
2332
2342
2333 The file will only exist if a splitting operation is in progress, but
2343 The file will only exist if a splitting operation is in progress, but
2334 it is always expected at the same location."""
2344 it is always expected at the same location."""
2335 parts = self.radix.split(b'/')
2345 parts = self.radix.split(b'/')
2336 if len(parts) > 1:
2346 if len(parts) > 1:
2337 # adds a '-s' prefix to the ``data/` or `meta/` base
2347 # adds a '-s' prefix to the ``data/` or `meta/` base
2338 head = parts[0] + b'-s'
2348 head = parts[0] + b'-s'
2339 mids = parts[1:-1]
2349 mids = parts[1:-1]
2340 tail = parts[-1] + b'.i'
2350 tail = parts[-1] + b'.i'
2341 pieces = [head] + mids + [tail]
2351 pieces = [head] + mids + [tail]
2342 return b'/'.join(pieces)
2352 return b'/'.join(pieces)
2343 else:
2353 else:
2344 # the revlog is stored at the root of the store (changelog or
2354 # the revlog is stored at the root of the store (changelog or
2345 # manifest), no risk of collision.
2355 # manifest), no risk of collision.
2346 return self.radix + b'.i.s'
2356 return self.radix + b'.i.s'
2347
2357
2348 def _enforceinlinesize(self, tr, side_write=True):
2358 def _enforceinlinesize(self, tr, side_write=True):
2349 """Check if the revlog is too big for inline and convert if so.
2359 """Check if the revlog is too big for inline and convert if so.
2350
2360
2351 This should be called after revisions are added to the revlog. If the
2361 This should be called after revisions are added to the revlog. If the
2352 revlog has grown too large to be an inline revlog, it will convert it
2362 revlog has grown too large to be an inline revlog, it will convert it
2353 to use multiple index and data files.
2363 to use multiple index and data files.
2354 """
2364 """
2355 tiprev = len(self) - 1
2365 tiprev = len(self) - 1
2356 total_size = self.start(tiprev) + self.length(tiprev)
2366 total_size = self.start(tiprev) + self.length(tiprev)
2357 if not self._inline or total_size < _maxinline:
2367 if not self._inline or total_size < _maxinline:
2358 return
2368 return
2359
2369
2360 troffset = tr.findoffset(self._indexfile)
2370 troffset = tr.findoffset(self._indexfile)
2361 if troffset is None:
2371 if troffset is None:
2362 raise error.RevlogError(
2372 raise error.RevlogError(
2363 _(b"%s not found in the transaction") % self._indexfile
2373 _(b"%s not found in the transaction") % self._indexfile
2364 )
2374 )
2365 if troffset:
2375 if troffset:
2366 tr.addbackup(self._indexfile, for_offset=True)
2376 tr.addbackup(self._indexfile, for_offset=True)
2367 tr.add(self._datafile, 0)
2377 tr.add(self._datafile, 0)
2368
2378
2369 existing_handles = False
2379 existing_handles = False
2370 if self._writinghandles is not None:
2380 if self._writinghandles is not None:
2371 existing_handles = True
2381 existing_handles = True
2372 fp = self._writinghandles[0]
2382 fp = self._writinghandles[0]
2373 fp.flush()
2383 fp.flush()
2374 fp.close()
2384 fp.close()
2375 # We can't use the cached file handle after close(). So prevent
2385 # We can't use the cached file handle after close(). So prevent
2376 # its usage.
2386 # its usage.
2377 self._writinghandles = None
2387 self._writinghandles = None
2378 self._segmentfile.writing_handle = None
2388 self._segmentfile.writing_handle = None
2379 # No need to deal with sidedata writing handle as it is only
2389 # No need to deal with sidedata writing handle as it is only
2380 # relevant with revlog-v2 which is never inline, not reaching
2390 # relevant with revlog-v2 which is never inline, not reaching
2381 # this code
2391 # this code
2382 if side_write:
2392 if side_write:
2383 old_index_file_path = self._indexfile
2393 old_index_file_path = self._indexfile
2384 new_index_file_path = self._split_index_file
2394 new_index_file_path = self._split_index_file
2385 opener = self.opener
2395 opener = self.opener
2386 weak_self = weakref.ref(self)
2396 weak_self = weakref.ref(self)
2387
2397
2388 # the "split" index replace the real index when the transaction is finalized
2398 # the "split" index replace the real index when the transaction is finalized
2389 def finalize_callback(tr):
2399 def finalize_callback(tr):
2390 opener.rename(
2400 opener.rename(
2391 new_index_file_path,
2401 new_index_file_path,
2392 old_index_file_path,
2402 old_index_file_path,
2393 checkambig=True,
2403 checkambig=True,
2394 )
2404 )
2395 maybe_self = weak_self()
2405 maybe_self = weak_self()
2396 if maybe_self is not None:
2406 if maybe_self is not None:
2397 maybe_self._indexfile = old_index_file_path
2407 maybe_self._indexfile = old_index_file_path
2398
2408
2399 def abort_callback(tr):
2409 def abort_callback(tr):
2400 maybe_self = weak_self()
2410 maybe_self = weak_self()
2401 if maybe_self is not None:
2411 if maybe_self is not None:
2402 maybe_self._indexfile = old_index_file_path
2412 maybe_self._indexfile = old_index_file_path
2403
2413
2404 tr.registertmp(new_index_file_path)
2414 tr.registertmp(new_index_file_path)
2405 if self.target[1] is not None:
2415 if self.target[1] is not None:
2406 callback_id = b'000-revlog-split-%d-%s' % self.target
2416 callback_id = b'000-revlog-split-%d-%s' % self.target
2407 else:
2417 else:
2408 callback_id = b'000-revlog-split-%d' % self.target[0]
2418 callback_id = b'000-revlog-split-%d' % self.target[0]
2409 tr.addfinalize(callback_id, finalize_callback)
2419 tr.addfinalize(callback_id, finalize_callback)
2410 tr.addabort(callback_id, abort_callback)
2420 tr.addabort(callback_id, abort_callback)
2411
2421
2412 new_dfh = self._datafp(b'w+')
2422 new_dfh = self._datafp(b'w+')
2413 new_dfh.truncate(0) # drop any potentially existing data
2423 new_dfh.truncate(0) # drop any potentially existing data
2414 try:
2424 try:
2415 with self.reading():
2425 with self.reading():
2416 for r in self:
2426 for r in self:
2417 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2427 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2418 new_dfh.flush()
2428 new_dfh.flush()
2419
2429
2420 if side_write:
2430 if side_write:
2421 self._indexfile = new_index_file_path
2431 self._indexfile = new_index_file_path
2422 with self.__index_new_fp() as fp:
2432 with self.__index_new_fp() as fp:
2423 self._format_flags &= ~FLAG_INLINE_DATA
2433 self._format_flags &= ~FLAG_INLINE_DATA
2424 self._inline = False
2434 self._inline = False
2425 for i in self:
2435 for i in self:
2426 e = self.index.entry_binary(i)
2436 e = self.index.entry_binary(i)
2427 if i == 0 and self._docket is None:
2437 if i == 0 and self._docket is None:
2428 header = self._format_flags | self._format_version
2438 header = self._format_flags | self._format_version
2429 header = self.index.pack_header(header)
2439 header = self.index.pack_header(header)
2430 e = header + e
2440 e = header + e
2431 fp.write(e)
2441 fp.write(e)
2432 if self._docket is not None:
2442 if self._docket is not None:
2433 self._docket.index_end = fp.tell()
2443 self._docket.index_end = fp.tell()
2434
2444
2435 # If we don't use side-write, the temp file replace the real
2445 # If we don't use side-write, the temp file replace the real
2436 # index when we exit the context manager
2446 # index when we exit the context manager
2437
2447
2438 nodemaputil.setup_persistent_nodemap(tr, self)
2448 nodemaputil.setup_persistent_nodemap(tr, self)
2439 self._segmentfile = randomaccessfile.randomaccessfile(
2449 self._segmentfile = randomaccessfile.randomaccessfile(
2440 self.opener,
2450 self.opener,
2441 self._datafile,
2451 self._datafile,
2442 self._chunkcachesize,
2452 self._chunkcachesize,
2443 )
2453 )
2444
2454
2445 if existing_handles:
2455 if existing_handles:
2446 # switched from inline to conventional reopen the index
2456 # switched from inline to conventional reopen the index
2447 ifh = self.__index_write_fp()
2457 ifh = self.__index_write_fp()
2448 self._writinghandles = (ifh, new_dfh, None)
2458 self._writinghandles = (ifh, new_dfh, None)
2449 self._segmentfile.writing_handle = new_dfh
2459 self._segmentfile.writing_handle = new_dfh
2450 new_dfh = None
2460 new_dfh = None
2451 # No need to deal with sidedata writing handle as it is only
2461 # No need to deal with sidedata writing handle as it is only
2452 # relevant with revlog-v2 which is never inline, not reaching
2462 # relevant with revlog-v2 which is never inline, not reaching
2453 # this code
2463 # this code
2454 finally:
2464 finally:
2455 if new_dfh is not None:
2465 if new_dfh is not None:
2456 new_dfh.close()
2466 new_dfh.close()
2457
2467
2458 def _nodeduplicatecallback(self, transaction, node):
2468 def _nodeduplicatecallback(self, transaction, node):
2459 """called when trying to add a node already stored."""
2469 """called when trying to add a node already stored."""
2460
2470
2461 @contextlib.contextmanager
2471 @contextlib.contextmanager
2462 def reading(self):
2472 def reading(self):
2463 """Context manager that keeps data and sidedata files open for reading"""
2473 """Context manager that keeps data and sidedata files open for reading"""
2464 if len(self.index) == 0:
2474 if len(self.index) == 0:
2465 yield # nothing to be read
2475 yield # nothing to be read
2466 else:
2476 else:
2467 with self._segmentfile.reading():
2477 with self._segmentfile.reading():
2468 with self._segmentfile_sidedata.reading():
2478 with self._segmentfile_sidedata.reading():
2469 yield
2479 yield
2470
2480
2471 @contextlib.contextmanager
2481 @contextlib.contextmanager
2472 def _writing(self, transaction):
2482 def _writing(self, transaction):
2473 if self._trypending:
2483 if self._trypending:
2474 msg = b'try to write in a `trypending` revlog: %s'
2484 msg = b'try to write in a `trypending` revlog: %s'
2475 msg %= self.display_id
2485 msg %= self.display_id
2476 raise error.ProgrammingError(msg)
2486 raise error.ProgrammingError(msg)
2477 if self._writinghandles is not None:
2487 if self._writinghandles is not None:
2478 yield
2488 yield
2479 else:
2489 else:
2480 ifh = dfh = sdfh = None
2490 ifh = dfh = sdfh = None
2481 try:
2491 try:
2482 r = len(self)
2492 r = len(self)
2483 # opening the data file.
2493 # opening the data file.
2484 dsize = 0
2494 dsize = 0
2485 if r:
2495 if r:
2486 dsize = self.end(r - 1)
2496 dsize = self.end(r - 1)
2487 dfh = None
2497 dfh = None
2488 if not self._inline:
2498 if not self._inline:
2489 try:
2499 try:
2490 dfh = self._datafp(b"r+")
2500 dfh = self._datafp(b"r+")
2491 if self._docket is None:
2501 if self._docket is None:
2492 dfh.seek(0, os.SEEK_END)
2502 dfh.seek(0, os.SEEK_END)
2493 else:
2503 else:
2494 dfh.seek(self._docket.data_end, os.SEEK_SET)
2504 dfh.seek(self._docket.data_end, os.SEEK_SET)
2495 except FileNotFoundError:
2505 except FileNotFoundError:
2496 dfh = self._datafp(b"w+")
2506 dfh = self._datafp(b"w+")
2497 transaction.add(self._datafile, dsize)
2507 transaction.add(self._datafile, dsize)
2498 if self._sidedatafile is not None:
2508 if self._sidedatafile is not None:
2499 # revlog-v2 does not inline, help Pytype
2509 # revlog-v2 does not inline, help Pytype
2500 assert dfh is not None
2510 assert dfh is not None
2501 try:
2511 try:
2502 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2512 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2503 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2513 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2504 except FileNotFoundError:
2514 except FileNotFoundError:
2505 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2515 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2506 transaction.add(
2516 transaction.add(
2507 self._sidedatafile, self._docket.sidedata_end
2517 self._sidedatafile, self._docket.sidedata_end
2508 )
2518 )
2509
2519
2510 # opening the index file.
2520 # opening the index file.
2511 isize = r * self.index.entry_size
2521 isize = r * self.index.entry_size
2512 ifh = self.__index_write_fp()
2522 ifh = self.__index_write_fp()
2513 if self._inline:
2523 if self._inline:
2514 transaction.add(self._indexfile, dsize + isize)
2524 transaction.add(self._indexfile, dsize + isize)
2515 else:
2525 else:
2516 transaction.add(self._indexfile, isize)
2526 transaction.add(self._indexfile, isize)
2517 # exposing all file handle for writing.
2527 # exposing all file handle for writing.
2518 self._writinghandles = (ifh, dfh, sdfh)
2528 self._writinghandles = (ifh, dfh, sdfh)
2519 self._segmentfile.writing_handle = ifh if self._inline else dfh
2529 self._segmentfile.writing_handle = ifh if self._inline else dfh
2520 self._segmentfile_sidedata.writing_handle = sdfh
2530 self._segmentfile_sidedata.writing_handle = sdfh
2521 yield
2531 yield
2522 if self._docket is not None:
2532 if self._docket is not None:
2523 self._write_docket(transaction)
2533 self._write_docket(transaction)
2524 finally:
2534 finally:
2525 self._writinghandles = None
2535 self._writinghandles = None
2526 self._segmentfile.writing_handle = None
2536 self._segmentfile.writing_handle = None
2527 self._segmentfile_sidedata.writing_handle = None
2537 self._segmentfile_sidedata.writing_handle = None
2528 if dfh is not None:
2538 if dfh is not None:
2529 dfh.close()
2539 dfh.close()
2530 if sdfh is not None:
2540 if sdfh is not None:
2531 sdfh.close()
2541 sdfh.close()
2532 # closing the index file last to avoid exposing referent to
2542 # closing the index file last to avoid exposing referent to
2533 # potential unflushed data content.
2543 # potential unflushed data content.
2534 if ifh is not None:
2544 if ifh is not None:
2535 ifh.close()
2545 ifh.close()
2536
2546
2537 def _write_docket(self, transaction):
2547 def _write_docket(self, transaction):
2538 """write the current docket on disk
2548 """write the current docket on disk
2539
2549
2540 Exist as a method to help changelog to implement transaction logic
2550 Exist as a method to help changelog to implement transaction logic
2541
2551
2542 We could also imagine using the same transaction logic for all revlog
2552 We could also imagine using the same transaction logic for all revlog
2543 since docket are cheap."""
2553 since docket are cheap."""
2544 self._docket.write(transaction)
2554 self._docket.write(transaction)
2545
2555
2546 def addrevision(
2556 def addrevision(
2547 self,
2557 self,
2548 text,
2558 text,
2549 transaction,
2559 transaction,
2550 link,
2560 link,
2551 p1,
2561 p1,
2552 p2,
2562 p2,
2553 cachedelta=None,
2563 cachedelta=None,
2554 node=None,
2564 node=None,
2555 flags=REVIDX_DEFAULT_FLAGS,
2565 flags=REVIDX_DEFAULT_FLAGS,
2556 deltacomputer=None,
2566 deltacomputer=None,
2557 sidedata=None,
2567 sidedata=None,
2558 ):
2568 ):
2559 """add a revision to the log
2569 """add a revision to the log
2560
2570
2561 text - the revision data to add
2571 text - the revision data to add
2562 transaction - the transaction object used for rollback
2572 transaction - the transaction object used for rollback
2563 link - the linkrev data to add
2573 link - the linkrev data to add
2564 p1, p2 - the parent nodeids of the revision
2574 p1, p2 - the parent nodeids of the revision
2565 cachedelta - an optional precomputed delta
2575 cachedelta - an optional precomputed delta
2566 node - nodeid of revision; typically node is not specified, and it is
2576 node - nodeid of revision; typically node is not specified, and it is
2567 computed by default as hash(text, p1, p2), however subclasses might
2577 computed by default as hash(text, p1, p2), however subclasses might
2568 use different hashing method (and override checkhash() in such case)
2578 use different hashing method (and override checkhash() in such case)
2569 flags - the known flags to set on the revision
2579 flags - the known flags to set on the revision
2570 deltacomputer - an optional deltacomputer instance shared between
2580 deltacomputer - an optional deltacomputer instance shared between
2571 multiple calls
2581 multiple calls
2572 """
2582 """
2573 if link == nullrev:
2583 if link == nullrev:
2574 raise error.RevlogError(
2584 raise error.RevlogError(
2575 _(b"attempted to add linkrev -1 to %s") % self.display_id
2585 _(b"attempted to add linkrev -1 to %s") % self.display_id
2576 )
2586 )
2577
2587
2578 if sidedata is None:
2588 if sidedata is None:
2579 sidedata = {}
2589 sidedata = {}
2580 elif sidedata and not self.hassidedata:
2590 elif sidedata and not self.hassidedata:
2581 raise error.ProgrammingError(
2591 raise error.ProgrammingError(
2582 _(b"trying to add sidedata to a revlog who don't support them")
2592 _(b"trying to add sidedata to a revlog who don't support them")
2583 )
2593 )
2584
2594
2585 if flags:
2595 if flags:
2586 node = node or self.hash(text, p1, p2)
2596 node = node or self.hash(text, p1, p2)
2587
2597
2588 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2598 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2589
2599
2590 # If the flag processor modifies the revision data, ignore any provided
2600 # If the flag processor modifies the revision data, ignore any provided
2591 # cachedelta.
2601 # cachedelta.
2592 if rawtext != text:
2602 if rawtext != text:
2593 cachedelta = None
2603 cachedelta = None
2594
2604
2595 if len(rawtext) > _maxentrysize:
2605 if len(rawtext) > _maxentrysize:
2596 raise error.RevlogError(
2606 raise error.RevlogError(
2597 _(
2607 _(
2598 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2608 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2599 )
2609 )
2600 % (self.display_id, len(rawtext))
2610 % (self.display_id, len(rawtext))
2601 )
2611 )
2602
2612
2603 node = node or self.hash(rawtext, p1, p2)
2613 node = node or self.hash(rawtext, p1, p2)
2604 rev = self.index.get_rev(node)
2614 rev = self.index.get_rev(node)
2605 if rev is not None:
2615 if rev is not None:
2606 return rev
2616 return rev
2607
2617
2608 if validatehash:
2618 if validatehash:
2609 self.checkhash(rawtext, node, p1=p1, p2=p2)
2619 self.checkhash(rawtext, node, p1=p1, p2=p2)
2610
2620
2611 return self.addrawrevision(
2621 return self.addrawrevision(
2612 rawtext,
2622 rawtext,
2613 transaction,
2623 transaction,
2614 link,
2624 link,
2615 p1,
2625 p1,
2616 p2,
2626 p2,
2617 node,
2627 node,
2618 flags,
2628 flags,
2619 cachedelta=cachedelta,
2629 cachedelta=cachedelta,
2620 deltacomputer=deltacomputer,
2630 deltacomputer=deltacomputer,
2621 sidedata=sidedata,
2631 sidedata=sidedata,
2622 )
2632 )
2623
2633
2624 def addrawrevision(
2634 def addrawrevision(
2625 self,
2635 self,
2626 rawtext,
2636 rawtext,
2627 transaction,
2637 transaction,
2628 link,
2638 link,
2629 p1,
2639 p1,
2630 p2,
2640 p2,
2631 node,
2641 node,
2632 flags,
2642 flags,
2633 cachedelta=None,
2643 cachedelta=None,
2634 deltacomputer=None,
2644 deltacomputer=None,
2635 sidedata=None,
2645 sidedata=None,
2636 ):
2646 ):
2637 """add a raw revision with known flags, node and parents
2647 """add a raw revision with known flags, node and parents
2638 useful when reusing a revision not stored in this revlog (ex: received
2648 useful when reusing a revision not stored in this revlog (ex: received
2639 over wire, or read from an external bundle).
2649 over wire, or read from an external bundle).
2640 """
2650 """
2641 with self._writing(transaction):
2651 with self._writing(transaction):
2642 return self._addrevision(
2652 return self._addrevision(
2643 node,
2653 node,
2644 rawtext,
2654 rawtext,
2645 transaction,
2655 transaction,
2646 link,
2656 link,
2647 p1,
2657 p1,
2648 p2,
2658 p2,
2649 flags,
2659 flags,
2650 cachedelta,
2660 cachedelta,
2651 deltacomputer=deltacomputer,
2661 deltacomputer=deltacomputer,
2652 sidedata=sidedata,
2662 sidedata=sidedata,
2653 )
2663 )
2654
2664
2655 def compress(self, data):
2665 def compress(self, data):
2656 """Generate a possibly-compressed representation of data."""
2666 """Generate a possibly-compressed representation of data."""
2657 if not data:
2667 if not data:
2658 return b'', data
2668 return b'', data
2659
2669
2660 compressed = self._compressor.compress(data)
2670 compressed = self._compressor.compress(data)
2661
2671
2662 if compressed:
2672 if compressed:
2663 # The revlog compressor added the header in the returned data.
2673 # The revlog compressor added the header in the returned data.
2664 return b'', compressed
2674 return b'', compressed
2665
2675
2666 if data[0:1] == b'\0':
2676 if data[0:1] == b'\0':
2667 return b'', data
2677 return b'', data
2668 return b'u', data
2678 return b'u', data
2669
2679
2670 def decompress(self, data):
2680 def decompress(self, data):
2671 """Decompress a revlog chunk.
2681 """Decompress a revlog chunk.
2672
2682
2673 The chunk is expected to begin with a header identifying the
2683 The chunk is expected to begin with a header identifying the
2674 format type so it can be routed to an appropriate decompressor.
2684 format type so it can be routed to an appropriate decompressor.
2675 """
2685 """
2676 if not data:
2686 if not data:
2677 return data
2687 return data
2678
2688
2679 # Revlogs are read much more frequently than they are written and many
2689 # Revlogs are read much more frequently than they are written and many
2680 # chunks only take microseconds to decompress, so performance is
2690 # chunks only take microseconds to decompress, so performance is
2681 # important here.
2691 # important here.
2682 #
2692 #
2683 # We can make a few assumptions about revlogs:
2693 # We can make a few assumptions about revlogs:
2684 #
2694 #
2685 # 1) the majority of chunks will be compressed (as opposed to inline
2695 # 1) the majority of chunks will be compressed (as opposed to inline
2686 # raw data).
2696 # raw data).
2687 # 2) decompressing *any* data will likely by at least 10x slower than
2697 # 2) decompressing *any* data will likely by at least 10x slower than
2688 # returning raw inline data.
2698 # returning raw inline data.
2689 # 3) we want to prioritize common and officially supported compression
2699 # 3) we want to prioritize common and officially supported compression
2690 # engines
2700 # engines
2691 #
2701 #
2692 # It follows that we want to optimize for "decompress compressed data
2702 # It follows that we want to optimize for "decompress compressed data
2693 # when encoded with common and officially supported compression engines"
2703 # when encoded with common and officially supported compression engines"
2694 # case over "raw data" and "data encoded by less common or non-official
2704 # case over "raw data" and "data encoded by less common or non-official
2695 # compression engines." That is why we have the inline lookup first
2705 # compression engines." That is why we have the inline lookup first
2696 # followed by the compengines lookup.
2706 # followed by the compengines lookup.
2697 #
2707 #
2698 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2708 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2699 # compressed chunks. And this matters for changelog and manifest reads.
2709 # compressed chunks. And this matters for changelog and manifest reads.
2700 t = data[0:1]
2710 t = data[0:1]
2701
2711
2702 if t == b'x':
2712 if t == b'x':
2703 try:
2713 try:
2704 return _zlibdecompress(data)
2714 return _zlibdecompress(data)
2705 except zlib.error as e:
2715 except zlib.error as e:
2706 raise error.RevlogError(
2716 raise error.RevlogError(
2707 _(b'revlog decompress error: %s')
2717 _(b'revlog decompress error: %s')
2708 % stringutil.forcebytestr(e)
2718 % stringutil.forcebytestr(e)
2709 )
2719 )
2710 # '\0' is more common than 'u' so it goes first.
2720 # '\0' is more common than 'u' so it goes first.
2711 elif t == b'\0':
2721 elif t == b'\0':
2712 return data
2722 return data
2713 elif t == b'u':
2723 elif t == b'u':
2714 return util.buffer(data, 1)
2724 return util.buffer(data, 1)
2715
2725
2716 compressor = self._get_decompressor(t)
2726 compressor = self._get_decompressor(t)
2717
2727
2718 return compressor.decompress(data)
2728 return compressor.decompress(data)
2719
2729
2720 def _addrevision(
2730 def _addrevision(
2721 self,
2731 self,
2722 node,
2732 node,
2723 rawtext,
2733 rawtext,
2724 transaction,
2734 transaction,
2725 link,
2735 link,
2726 p1,
2736 p1,
2727 p2,
2737 p2,
2728 flags,
2738 flags,
2729 cachedelta,
2739 cachedelta,
2730 alwayscache=False,
2740 alwayscache=False,
2731 deltacomputer=None,
2741 deltacomputer=None,
2732 sidedata=None,
2742 sidedata=None,
2733 ):
2743 ):
2734 """internal function to add revisions to the log
2744 """internal function to add revisions to the log
2735
2745
2736 see addrevision for argument descriptions.
2746 see addrevision for argument descriptions.
2737
2747
2738 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2748 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2739
2749
2740 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2750 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2741 be used.
2751 be used.
2742
2752
2743 invariants:
2753 invariants:
2744 - rawtext is optional (can be None); if not set, cachedelta must be set.
2754 - rawtext is optional (can be None); if not set, cachedelta must be set.
2745 if both are set, they must correspond to each other.
2755 if both are set, they must correspond to each other.
2746 """
2756 """
2747 if node == self.nullid:
2757 if node == self.nullid:
2748 raise error.RevlogError(
2758 raise error.RevlogError(
2749 _(b"%s: attempt to add null revision") % self.display_id
2759 _(b"%s: attempt to add null revision") % self.display_id
2750 )
2760 )
2751 if (
2761 if (
2752 node == self.nodeconstants.wdirid
2762 node == self.nodeconstants.wdirid
2753 or node in self.nodeconstants.wdirfilenodeids
2763 or node in self.nodeconstants.wdirfilenodeids
2754 ):
2764 ):
2755 raise error.RevlogError(
2765 raise error.RevlogError(
2756 _(b"%s: attempt to add wdir revision") % self.display_id
2766 _(b"%s: attempt to add wdir revision") % self.display_id
2757 )
2767 )
2758 if self._writinghandles is None:
2768 if self._writinghandles is None:
2759 msg = b'adding revision outside `revlog._writing` context'
2769 msg = b'adding revision outside `revlog._writing` context'
2760 raise error.ProgrammingError(msg)
2770 raise error.ProgrammingError(msg)
2761
2771
2762 btext = [rawtext]
2772 btext = [rawtext]
2763
2773
2764 curr = len(self)
2774 curr = len(self)
2765 prev = curr - 1
2775 prev = curr - 1
2766
2776
2767 offset = self._get_data_offset(prev)
2777 offset = self._get_data_offset(prev)
2768
2778
2769 if self._concurrencychecker:
2779 if self._concurrencychecker:
2770 ifh, dfh, sdfh = self._writinghandles
2780 ifh, dfh, sdfh = self._writinghandles
2771 # XXX no checking for the sidedata file
2781 # XXX no checking for the sidedata file
2772 if self._inline:
2782 if self._inline:
2773 # offset is "as if" it were in the .d file, so we need to add on
2783 # offset is "as if" it were in the .d file, so we need to add on
2774 # the size of the entry metadata.
2784 # the size of the entry metadata.
2775 self._concurrencychecker(
2785 self._concurrencychecker(
2776 ifh, self._indexfile, offset + curr * self.index.entry_size
2786 ifh, self._indexfile, offset + curr * self.index.entry_size
2777 )
2787 )
2778 else:
2788 else:
2779 # Entries in the .i are a consistent size.
2789 # Entries in the .i are a consistent size.
2780 self._concurrencychecker(
2790 self._concurrencychecker(
2781 ifh, self._indexfile, curr * self.index.entry_size
2791 ifh, self._indexfile, curr * self.index.entry_size
2782 )
2792 )
2783 self._concurrencychecker(dfh, self._datafile, offset)
2793 self._concurrencychecker(dfh, self._datafile, offset)
2784
2794
2785 p1r, p2r = self.rev(p1), self.rev(p2)
2795 p1r, p2r = self.rev(p1), self.rev(p2)
2786
2796
2787 # full versions are inserted when the needed deltas
2797 # full versions are inserted when the needed deltas
2788 # become comparable to the uncompressed text
2798 # become comparable to the uncompressed text
2789 if rawtext is None:
2799 if rawtext is None:
2790 # need rawtext size, before changed by flag processors, which is
2800 # need rawtext size, before changed by flag processors, which is
2791 # the non-raw size. use revlog explicitly to avoid filelog's extra
2801 # the non-raw size. use revlog explicitly to avoid filelog's extra
2792 # logic that might remove metadata size.
2802 # logic that might remove metadata size.
2793 textlen = mdiff.patchedsize(
2803 textlen = mdiff.patchedsize(
2794 revlog.size(self, cachedelta[0]), cachedelta[1]
2804 revlog.size(self, cachedelta[0]), cachedelta[1]
2795 )
2805 )
2796 else:
2806 else:
2797 textlen = len(rawtext)
2807 textlen = len(rawtext)
2798
2808
2799 if deltacomputer is None:
2809 if deltacomputer is None:
2800 write_debug = None
2810 write_debug = None
2801 if self._debug_delta:
2811 if self._debug_delta:
2802 write_debug = transaction._report
2812 write_debug = transaction._report
2803 deltacomputer = deltautil.deltacomputer(
2813 deltacomputer = deltautil.deltacomputer(
2804 self, write_debug=write_debug
2814 self, write_debug=write_debug
2805 )
2815 )
2806
2816
2807 if cachedelta is not None and len(cachedelta) == 2:
2817 if cachedelta is not None and len(cachedelta) == 2:
2808 # If the cached delta has no information about how it should be
2818 # If the cached delta has no information about how it should be
2809 # reused, add the default reuse instruction according to the
2819 # reused, add the default reuse instruction according to the
2810 # revlog's configuration.
2820 # revlog's configuration.
2811 if self._generaldelta and self._lazydeltabase:
2821 if self._generaldelta and self._lazydeltabase:
2812 delta_base_reuse = DELTA_BASE_REUSE_TRY
2822 delta_base_reuse = DELTA_BASE_REUSE_TRY
2813 else:
2823 else:
2814 delta_base_reuse = DELTA_BASE_REUSE_NO
2824 delta_base_reuse = DELTA_BASE_REUSE_NO
2815 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2825 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2816
2826
2817 revinfo = revlogutils.revisioninfo(
2827 revinfo = revlogutils.revisioninfo(
2818 node,
2828 node,
2819 p1,
2829 p1,
2820 p2,
2830 p2,
2821 btext,
2831 btext,
2822 textlen,
2832 textlen,
2823 cachedelta,
2833 cachedelta,
2824 flags,
2834 flags,
2825 )
2835 )
2826
2836
2827 deltainfo = deltacomputer.finddeltainfo(revinfo)
2837 deltainfo = deltacomputer.finddeltainfo(revinfo)
2828
2838
2829 compression_mode = COMP_MODE_INLINE
2839 compression_mode = COMP_MODE_INLINE
2830 if self._docket is not None:
2840 if self._docket is not None:
2831 default_comp = self._docket.default_compression_header
2841 default_comp = self._docket.default_compression_header
2832 r = deltautil.delta_compression(default_comp, deltainfo)
2842 r = deltautil.delta_compression(default_comp, deltainfo)
2833 compression_mode, deltainfo = r
2843 compression_mode, deltainfo = r
2834
2844
2835 sidedata_compression_mode = COMP_MODE_INLINE
2845 sidedata_compression_mode = COMP_MODE_INLINE
2836 if sidedata and self.hassidedata:
2846 if sidedata and self.hassidedata:
2837 sidedata_compression_mode = COMP_MODE_PLAIN
2847 sidedata_compression_mode = COMP_MODE_PLAIN
2838 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2848 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2839 sidedata_offset = self._docket.sidedata_end
2849 sidedata_offset = self._docket.sidedata_end
2840 h, comp_sidedata = self.compress(serialized_sidedata)
2850 h, comp_sidedata = self.compress(serialized_sidedata)
2841 if (
2851 if (
2842 h != b'u'
2852 h != b'u'
2843 and comp_sidedata[0:1] != b'\0'
2853 and comp_sidedata[0:1] != b'\0'
2844 and len(comp_sidedata) < len(serialized_sidedata)
2854 and len(comp_sidedata) < len(serialized_sidedata)
2845 ):
2855 ):
2846 assert not h
2856 assert not h
2847 if (
2857 if (
2848 comp_sidedata[0:1]
2858 comp_sidedata[0:1]
2849 == self._docket.default_compression_header
2859 == self._docket.default_compression_header
2850 ):
2860 ):
2851 sidedata_compression_mode = COMP_MODE_DEFAULT
2861 sidedata_compression_mode = COMP_MODE_DEFAULT
2852 serialized_sidedata = comp_sidedata
2862 serialized_sidedata = comp_sidedata
2853 else:
2863 else:
2854 sidedata_compression_mode = COMP_MODE_INLINE
2864 sidedata_compression_mode = COMP_MODE_INLINE
2855 serialized_sidedata = comp_sidedata
2865 serialized_sidedata = comp_sidedata
2856 else:
2866 else:
2857 serialized_sidedata = b""
2867 serialized_sidedata = b""
2858 # Don't store the offset if the sidedata is empty, that way
2868 # Don't store the offset if the sidedata is empty, that way
2859 # we can easily detect empty sidedata and they will be no different
2869 # we can easily detect empty sidedata and they will be no different
2860 # than ones we manually add.
2870 # than ones we manually add.
2861 sidedata_offset = 0
2871 sidedata_offset = 0
2862
2872
2863 rank = RANK_UNKNOWN
2873 rank = RANK_UNKNOWN
2864 if self._compute_rank:
2874 if self._compute_rank:
2865 if (p1r, p2r) == (nullrev, nullrev):
2875 if (p1r, p2r) == (nullrev, nullrev):
2866 rank = 1
2876 rank = 1
2867 elif p1r != nullrev and p2r == nullrev:
2877 elif p1r != nullrev and p2r == nullrev:
2868 rank = 1 + self.fast_rank(p1r)
2878 rank = 1 + self.fast_rank(p1r)
2869 elif p1r == nullrev and p2r != nullrev:
2879 elif p1r == nullrev and p2r != nullrev:
2870 rank = 1 + self.fast_rank(p2r)
2880 rank = 1 + self.fast_rank(p2r)
2871 else: # merge node
2881 else: # merge node
2872 if rustdagop is not None and self.index.rust_ext_compat:
2882 if rustdagop is not None and self.index.rust_ext_compat:
2873 rank = rustdagop.rank(self.index, p1r, p2r)
2883 rank = rustdagop.rank(self.index, p1r, p2r)
2874 else:
2884 else:
2875 pmin, pmax = sorted((p1r, p2r))
2885 pmin, pmax = sorted((p1r, p2r))
2876 rank = 1 + self.fast_rank(pmax)
2886 rank = 1 + self.fast_rank(pmax)
2877 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2887 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2878
2888
2879 e = revlogutils.entry(
2889 e = revlogutils.entry(
2880 flags=flags,
2890 flags=flags,
2881 data_offset=offset,
2891 data_offset=offset,
2882 data_compressed_length=deltainfo.deltalen,
2892 data_compressed_length=deltainfo.deltalen,
2883 data_uncompressed_length=textlen,
2893 data_uncompressed_length=textlen,
2884 data_compression_mode=compression_mode,
2894 data_compression_mode=compression_mode,
2885 data_delta_base=deltainfo.base,
2895 data_delta_base=deltainfo.base,
2886 link_rev=link,
2896 link_rev=link,
2887 parent_rev_1=p1r,
2897 parent_rev_1=p1r,
2888 parent_rev_2=p2r,
2898 parent_rev_2=p2r,
2889 node_id=node,
2899 node_id=node,
2890 sidedata_offset=sidedata_offset,
2900 sidedata_offset=sidedata_offset,
2891 sidedata_compressed_length=len(serialized_sidedata),
2901 sidedata_compressed_length=len(serialized_sidedata),
2892 sidedata_compression_mode=sidedata_compression_mode,
2902 sidedata_compression_mode=sidedata_compression_mode,
2893 rank=rank,
2903 rank=rank,
2894 )
2904 )
2895
2905
2896 self.index.append(e)
2906 self.index.append(e)
2897 entry = self.index.entry_binary(curr)
2907 entry = self.index.entry_binary(curr)
2898 if curr == 0 and self._docket is None:
2908 if curr == 0 and self._docket is None:
2899 header = self._format_flags | self._format_version
2909 header = self._format_flags | self._format_version
2900 header = self.index.pack_header(header)
2910 header = self.index.pack_header(header)
2901 entry = header + entry
2911 entry = header + entry
2902 self._writeentry(
2912 self._writeentry(
2903 transaction,
2913 transaction,
2904 entry,
2914 entry,
2905 deltainfo.data,
2915 deltainfo.data,
2906 link,
2916 link,
2907 offset,
2917 offset,
2908 serialized_sidedata,
2918 serialized_sidedata,
2909 sidedata_offset,
2919 sidedata_offset,
2910 )
2920 )
2911
2921
2912 rawtext = btext[0]
2922 rawtext = btext[0]
2913
2923
2914 if alwayscache and rawtext is None:
2924 if alwayscache and rawtext is None:
2915 rawtext = deltacomputer.buildtext(revinfo)
2925 rawtext = deltacomputer.buildtext(revinfo)
2916
2926
2917 if type(rawtext) == bytes: # only accept immutable objects
2927 if type(rawtext) == bytes: # only accept immutable objects
2918 self._revisioncache = (node, curr, rawtext)
2928 self._revisioncache = (node, curr, rawtext)
2919 self._chainbasecache[curr] = deltainfo.chainbase
2929 self._chainbasecache[curr] = deltainfo.chainbase
2920 return curr
2930 return curr
2921
2931
2922 def _get_data_offset(self, prev):
2932 def _get_data_offset(self, prev):
2923 """Returns the current offset in the (in-transaction) data file.
2933 """Returns the current offset in the (in-transaction) data file.
2924 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2934 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2925 file to store that information: since sidedata can be rewritten to the
2935 file to store that information: since sidedata can be rewritten to the
2926 end of the data file within a transaction, you can have cases where, for
2936 end of the data file within a transaction, you can have cases where, for
2927 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2937 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2928 to `n - 1`'s sidedata being written after `n`'s data.
2938 to `n - 1`'s sidedata being written after `n`'s data.
2929
2939
2930 TODO cache this in a docket file before getting out of experimental."""
2940 TODO cache this in a docket file before getting out of experimental."""
2931 if self._docket is None:
2941 if self._docket is None:
2932 return self.end(prev)
2942 return self.end(prev)
2933 else:
2943 else:
2934 return self._docket.data_end
2944 return self._docket.data_end
2935
2945
2936 def _writeentry(
2946 def _writeentry(
2937 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2947 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2938 ):
2948 ):
2939 # Files opened in a+ mode have inconsistent behavior on various
2949 # Files opened in a+ mode have inconsistent behavior on various
2940 # platforms. Windows requires that a file positioning call be made
2950 # platforms. Windows requires that a file positioning call be made
2941 # when the file handle transitions between reads and writes. See
2951 # when the file handle transitions between reads and writes. See
2942 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2952 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2943 # platforms, Python or the platform itself can be buggy. Some versions
2953 # platforms, Python or the platform itself can be buggy. Some versions
2944 # of Solaris have been observed to not append at the end of the file
2954 # of Solaris have been observed to not append at the end of the file
2945 # if the file was seeked to before the end. See issue4943 for more.
2955 # if the file was seeked to before the end. See issue4943 for more.
2946 #
2956 #
2947 # We work around this issue by inserting a seek() before writing.
2957 # We work around this issue by inserting a seek() before writing.
2948 # Note: This is likely not necessary on Python 3. However, because
2958 # Note: This is likely not necessary on Python 3. However, because
2949 # the file handle is reused for reads and may be seeked there, we need
2959 # the file handle is reused for reads and may be seeked there, we need
2950 # to be careful before changing this.
2960 # to be careful before changing this.
2951 if self._writinghandles is None:
2961 if self._writinghandles is None:
2952 msg = b'adding revision outside `revlog._writing` context'
2962 msg = b'adding revision outside `revlog._writing` context'
2953 raise error.ProgrammingError(msg)
2963 raise error.ProgrammingError(msg)
2954 ifh, dfh, sdfh = self._writinghandles
2964 ifh, dfh, sdfh = self._writinghandles
2955 if self._docket is None:
2965 if self._docket is None:
2956 ifh.seek(0, os.SEEK_END)
2966 ifh.seek(0, os.SEEK_END)
2957 else:
2967 else:
2958 ifh.seek(self._docket.index_end, os.SEEK_SET)
2968 ifh.seek(self._docket.index_end, os.SEEK_SET)
2959 if dfh:
2969 if dfh:
2960 if self._docket is None:
2970 if self._docket is None:
2961 dfh.seek(0, os.SEEK_END)
2971 dfh.seek(0, os.SEEK_END)
2962 else:
2972 else:
2963 dfh.seek(self._docket.data_end, os.SEEK_SET)
2973 dfh.seek(self._docket.data_end, os.SEEK_SET)
2964 if sdfh:
2974 if sdfh:
2965 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2975 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2966
2976
2967 curr = len(self) - 1
2977 curr = len(self) - 1
2968 if not self._inline:
2978 if not self._inline:
2969 transaction.add(self._datafile, offset)
2979 transaction.add(self._datafile, offset)
2970 if self._sidedatafile:
2980 if self._sidedatafile:
2971 transaction.add(self._sidedatafile, sidedata_offset)
2981 transaction.add(self._sidedatafile, sidedata_offset)
2972 transaction.add(self._indexfile, curr * len(entry))
2982 transaction.add(self._indexfile, curr * len(entry))
2973 if data[0]:
2983 if data[0]:
2974 dfh.write(data[0])
2984 dfh.write(data[0])
2975 dfh.write(data[1])
2985 dfh.write(data[1])
2976 if sidedata:
2986 if sidedata:
2977 sdfh.write(sidedata)
2987 sdfh.write(sidedata)
2978 ifh.write(entry)
2988 ifh.write(entry)
2979 else:
2989 else:
2980 offset += curr * self.index.entry_size
2990 offset += curr * self.index.entry_size
2981 transaction.add(self._indexfile, offset)
2991 transaction.add(self._indexfile, offset)
2982 ifh.write(entry)
2992 ifh.write(entry)
2983 ifh.write(data[0])
2993 ifh.write(data[0])
2984 ifh.write(data[1])
2994 ifh.write(data[1])
2985 assert not sidedata
2995 assert not sidedata
2986 self._enforceinlinesize(transaction)
2996 self._enforceinlinesize(transaction)
2987 if self._docket is not None:
2997 if self._docket is not None:
2988 # revlog-v2 always has 3 writing handles, help Pytype
2998 # revlog-v2 always has 3 writing handles, help Pytype
2989 wh1 = self._writinghandles[0]
2999 wh1 = self._writinghandles[0]
2990 wh2 = self._writinghandles[1]
3000 wh2 = self._writinghandles[1]
2991 wh3 = self._writinghandles[2]
3001 wh3 = self._writinghandles[2]
2992 assert wh1 is not None
3002 assert wh1 is not None
2993 assert wh2 is not None
3003 assert wh2 is not None
2994 assert wh3 is not None
3004 assert wh3 is not None
2995 self._docket.index_end = wh1.tell()
3005 self._docket.index_end = wh1.tell()
2996 self._docket.data_end = wh2.tell()
3006 self._docket.data_end = wh2.tell()
2997 self._docket.sidedata_end = wh3.tell()
3007 self._docket.sidedata_end = wh3.tell()
2998
3008
2999 nodemaputil.setup_persistent_nodemap(transaction, self)
3009 nodemaputil.setup_persistent_nodemap(transaction, self)
3000
3010
3001 def addgroup(
3011 def addgroup(
3002 self,
3012 self,
3003 deltas,
3013 deltas,
3004 linkmapper,
3014 linkmapper,
3005 transaction,
3015 transaction,
3006 alwayscache=False,
3016 alwayscache=False,
3007 addrevisioncb=None,
3017 addrevisioncb=None,
3008 duplicaterevisioncb=None,
3018 duplicaterevisioncb=None,
3009 debug_info=None,
3019 debug_info=None,
3010 delta_base_reuse_policy=None,
3020 delta_base_reuse_policy=None,
3011 ):
3021 ):
3012 """
3022 """
3013 add a delta group
3023 add a delta group
3014
3024
3015 given a set of deltas, add them to the revision log. the
3025 given a set of deltas, add them to the revision log. the
3016 first delta is against its parent, which should be in our
3026 first delta is against its parent, which should be in our
3017 log, the rest are against the previous delta.
3027 log, the rest are against the previous delta.
3018
3028
3019 If ``addrevisioncb`` is defined, it will be called with arguments of
3029 If ``addrevisioncb`` is defined, it will be called with arguments of
3020 this revlog and the node that was added.
3030 this revlog and the node that was added.
3021 """
3031 """
3022
3032
3023 if self._adding_group:
3033 if self._adding_group:
3024 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3034 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3025
3035
3026 # read the default delta-base reuse policy from revlog config if the
3036 # read the default delta-base reuse policy from revlog config if the
3027 # group did not specify one.
3037 # group did not specify one.
3028 if delta_base_reuse_policy is None:
3038 if delta_base_reuse_policy is None:
3029 if self._generaldelta and self._lazydeltabase:
3039 if self._generaldelta and self._lazydeltabase:
3030 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3040 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3031 else:
3041 else:
3032 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3042 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3033
3043
3034 self._adding_group = True
3044 self._adding_group = True
3035 empty = True
3045 empty = True
3036 try:
3046 try:
3037 with self._writing(transaction):
3047 with self._writing(transaction):
3038 write_debug = None
3048 write_debug = None
3039 if self._debug_delta:
3049 if self._debug_delta:
3040 write_debug = transaction._report
3050 write_debug = transaction._report
3041 deltacomputer = deltautil.deltacomputer(
3051 deltacomputer = deltautil.deltacomputer(
3042 self,
3052 self,
3043 write_debug=write_debug,
3053 write_debug=write_debug,
3044 debug_info=debug_info,
3054 debug_info=debug_info,
3045 )
3055 )
3046 # loop through our set of deltas
3056 # loop through our set of deltas
3047 for data in deltas:
3057 for data in deltas:
3048 (
3058 (
3049 node,
3059 node,
3050 p1,
3060 p1,
3051 p2,
3061 p2,
3052 linknode,
3062 linknode,
3053 deltabase,
3063 deltabase,
3054 delta,
3064 delta,
3055 flags,
3065 flags,
3056 sidedata,
3066 sidedata,
3057 ) = data
3067 ) = data
3058 link = linkmapper(linknode)
3068 link = linkmapper(linknode)
3059 flags = flags or REVIDX_DEFAULT_FLAGS
3069 flags = flags or REVIDX_DEFAULT_FLAGS
3060
3070
3061 rev = self.index.get_rev(node)
3071 rev = self.index.get_rev(node)
3062 if rev is not None:
3072 if rev is not None:
3063 # this can happen if two branches make the same change
3073 # this can happen if two branches make the same change
3064 self._nodeduplicatecallback(transaction, rev)
3074 self._nodeduplicatecallback(transaction, rev)
3065 if duplicaterevisioncb:
3075 if duplicaterevisioncb:
3066 duplicaterevisioncb(self, rev)
3076 duplicaterevisioncb(self, rev)
3067 empty = False
3077 empty = False
3068 continue
3078 continue
3069
3079
3070 for p in (p1, p2):
3080 for p in (p1, p2):
3071 if not self.index.has_node(p):
3081 if not self.index.has_node(p):
3072 raise error.LookupError(
3082 raise error.LookupError(
3073 p, self.radix, _(b'unknown parent')
3083 p, self.radix, _(b'unknown parent')
3074 )
3084 )
3075
3085
3076 if not self.index.has_node(deltabase):
3086 if not self.index.has_node(deltabase):
3077 raise error.LookupError(
3087 raise error.LookupError(
3078 deltabase, self.display_id, _(b'unknown delta base')
3088 deltabase, self.display_id, _(b'unknown delta base')
3079 )
3089 )
3080
3090
3081 baserev = self.rev(deltabase)
3091 baserev = self.rev(deltabase)
3082
3092
3083 if baserev != nullrev and self.iscensored(baserev):
3093 if baserev != nullrev and self.iscensored(baserev):
3084 # if base is censored, delta must be full replacement in a
3094 # if base is censored, delta must be full replacement in a
3085 # single patch operation
3095 # single patch operation
3086 hlen = struct.calcsize(b">lll")
3096 hlen = struct.calcsize(b">lll")
3087 oldlen = self.rawsize(baserev)
3097 oldlen = self.rawsize(baserev)
3088 newlen = len(delta) - hlen
3098 newlen = len(delta) - hlen
3089 if delta[:hlen] != mdiff.replacediffheader(
3099 if delta[:hlen] != mdiff.replacediffheader(
3090 oldlen, newlen
3100 oldlen, newlen
3091 ):
3101 ):
3092 raise error.CensoredBaseError(
3102 raise error.CensoredBaseError(
3093 self.display_id, self.node(baserev)
3103 self.display_id, self.node(baserev)
3094 )
3104 )
3095
3105
3096 if not flags and self._peek_iscensored(baserev, delta):
3106 if not flags and self._peek_iscensored(baserev, delta):
3097 flags |= REVIDX_ISCENSORED
3107 flags |= REVIDX_ISCENSORED
3098
3108
3099 # We assume consumers of addrevisioncb will want to retrieve
3109 # We assume consumers of addrevisioncb will want to retrieve
3100 # the added revision, which will require a call to
3110 # the added revision, which will require a call to
3101 # revision(). revision() will fast path if there is a cache
3111 # revision(). revision() will fast path if there is a cache
3102 # hit. So, we tell _addrevision() to always cache in this case.
3112 # hit. So, we tell _addrevision() to always cache in this case.
3103 # We're only using addgroup() in the context of changegroup
3113 # We're only using addgroup() in the context of changegroup
3104 # generation so the revision data can always be handled as raw
3114 # generation so the revision data can always be handled as raw
3105 # by the flagprocessor.
3115 # by the flagprocessor.
3106 rev = self._addrevision(
3116 rev = self._addrevision(
3107 node,
3117 node,
3108 None,
3118 None,
3109 transaction,
3119 transaction,
3110 link,
3120 link,
3111 p1,
3121 p1,
3112 p2,
3122 p2,
3113 flags,
3123 flags,
3114 (baserev, delta, delta_base_reuse_policy),
3124 (baserev, delta, delta_base_reuse_policy),
3115 alwayscache=alwayscache,
3125 alwayscache=alwayscache,
3116 deltacomputer=deltacomputer,
3126 deltacomputer=deltacomputer,
3117 sidedata=sidedata,
3127 sidedata=sidedata,
3118 )
3128 )
3119
3129
3120 if addrevisioncb:
3130 if addrevisioncb:
3121 addrevisioncb(self, rev)
3131 addrevisioncb(self, rev)
3122 empty = False
3132 empty = False
3123 finally:
3133 finally:
3124 self._adding_group = False
3134 self._adding_group = False
3125 return not empty
3135 return not empty
3126
3136
3127 def iscensored(self, rev):
3137 def iscensored(self, rev):
3128 """Check if a file revision is censored."""
3138 """Check if a file revision is censored."""
3129 if not self._censorable:
3139 if not self._censorable:
3130 return False
3140 return False
3131
3141
3132 return self.flags(rev) & REVIDX_ISCENSORED
3142 return self.flags(rev) & REVIDX_ISCENSORED
3133
3143
3134 def _peek_iscensored(self, baserev, delta):
3144 def _peek_iscensored(self, baserev, delta):
3135 """Quickly check if a delta produces a censored revision."""
3145 """Quickly check if a delta produces a censored revision."""
3136 if not self._censorable:
3146 if not self._censorable:
3137 return False
3147 return False
3138
3148
3139 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3149 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3140
3150
3141 def getstrippoint(self, minlink):
3151 def getstrippoint(self, minlink):
3142 """find the minimum rev that must be stripped to strip the linkrev
3152 """find the minimum rev that must be stripped to strip the linkrev
3143
3153
3144 Returns a tuple containing the minimum rev and a set of all revs that
3154 Returns a tuple containing the minimum rev and a set of all revs that
3145 have linkrevs that will be broken by this strip.
3155 have linkrevs that will be broken by this strip.
3146 """
3156 """
3147 return storageutil.resolvestripinfo(
3157 return storageutil.resolvestripinfo(
3148 minlink,
3158 minlink,
3149 len(self) - 1,
3159 len(self) - 1,
3150 self.headrevs(),
3160 self.headrevs(),
3151 self.linkrev,
3161 self.linkrev,
3152 self.parentrevs,
3162 self.parentrevs,
3153 )
3163 )
3154
3164
3155 def strip(self, minlink, transaction):
3165 def strip(self, minlink, transaction):
3156 """truncate the revlog on the first revision with a linkrev >= minlink
3166 """truncate the revlog on the first revision with a linkrev >= minlink
3157
3167
3158 This function is called when we're stripping revision minlink and
3168 This function is called when we're stripping revision minlink and
3159 its descendants from the repository.
3169 its descendants from the repository.
3160
3170
3161 We have to remove all revisions with linkrev >= minlink, because
3171 We have to remove all revisions with linkrev >= minlink, because
3162 the equivalent changelog revisions will be renumbered after the
3172 the equivalent changelog revisions will be renumbered after the
3163 strip.
3173 strip.
3164
3174
3165 So we truncate the revlog on the first of these revisions, and
3175 So we truncate the revlog on the first of these revisions, and
3166 trust that the caller has saved the revisions that shouldn't be
3176 trust that the caller has saved the revisions that shouldn't be
3167 removed and that it'll re-add them after this truncation.
3177 removed and that it'll re-add them after this truncation.
3168 """
3178 """
3169 if len(self) == 0:
3179 if len(self) == 0:
3170 return
3180 return
3171
3181
3172 rev, _ = self.getstrippoint(minlink)
3182 rev, _ = self.getstrippoint(minlink)
3173 if rev == len(self):
3183 if rev == len(self):
3174 return
3184 return
3175
3185
3176 # first truncate the files on disk
3186 # first truncate the files on disk
3177 data_end = self.start(rev)
3187 data_end = self.start(rev)
3178 if not self._inline:
3188 if not self._inline:
3179 transaction.add(self._datafile, data_end)
3189 transaction.add(self._datafile, data_end)
3180 end = rev * self.index.entry_size
3190 end = rev * self.index.entry_size
3181 else:
3191 else:
3182 end = data_end + (rev * self.index.entry_size)
3192 end = data_end + (rev * self.index.entry_size)
3183
3193
3184 if self._sidedatafile:
3194 if self._sidedatafile:
3185 sidedata_end = self.sidedata_cut_off(rev)
3195 sidedata_end = self.sidedata_cut_off(rev)
3186 transaction.add(self._sidedatafile, sidedata_end)
3196 transaction.add(self._sidedatafile, sidedata_end)
3187
3197
3188 transaction.add(self._indexfile, end)
3198 transaction.add(self._indexfile, end)
3189 if self._docket is not None:
3199 if self._docket is not None:
3190 # XXX we could, leverage the docket while stripping. However it is
3200 # XXX we could, leverage the docket while stripping. However it is
3191 # not powerfull enough at the time of this comment
3201 # not powerfull enough at the time of this comment
3192 self._docket.index_end = end
3202 self._docket.index_end = end
3193 self._docket.data_end = data_end
3203 self._docket.data_end = data_end
3194 self._docket.sidedata_end = sidedata_end
3204 self._docket.sidedata_end = sidedata_end
3195 self._docket.write(transaction, stripping=True)
3205 self._docket.write(transaction, stripping=True)
3196
3206
3197 # then reset internal state in memory to forget those revisions
3207 # then reset internal state in memory to forget those revisions
3198 self._revisioncache = None
3208 self._revisioncache = None
3199 self._chaininfocache = util.lrucachedict(500)
3209 self._chaininfocache = util.lrucachedict(500)
3200 self._segmentfile.clear_cache()
3210 self._segmentfile.clear_cache()
3201 self._segmentfile_sidedata.clear_cache()
3211 self._segmentfile_sidedata.clear_cache()
3202
3212
3203 del self.index[rev:-1]
3213 del self.index[rev:-1]
3204
3214
3205 def checksize(self):
3215 def checksize(self):
3206 """Check size of index and data files
3216 """Check size of index and data files
3207
3217
3208 return a (dd, di) tuple.
3218 return a (dd, di) tuple.
3209 - dd: extra bytes for the "data" file
3219 - dd: extra bytes for the "data" file
3210 - di: extra bytes for the "index" file
3220 - di: extra bytes for the "index" file
3211
3221
3212 A healthy revlog will return (0, 0).
3222 A healthy revlog will return (0, 0).
3213 """
3223 """
3214 expected = 0
3224 expected = 0
3215 if len(self):
3225 if len(self):
3216 expected = max(0, self.end(len(self) - 1))
3226 expected = max(0, self.end(len(self) - 1))
3217
3227
3218 try:
3228 try:
3219 with self._datafp() as f:
3229 with self._datafp() as f:
3220 f.seek(0, io.SEEK_END)
3230 f.seek(0, io.SEEK_END)
3221 actual = f.tell()
3231 actual = f.tell()
3222 dd = actual - expected
3232 dd = actual - expected
3223 except FileNotFoundError:
3233 except FileNotFoundError:
3224 dd = 0
3234 dd = 0
3225
3235
3226 try:
3236 try:
3227 f = self.opener(self._indexfile)
3237 f = self.opener(self._indexfile)
3228 f.seek(0, io.SEEK_END)
3238 f.seek(0, io.SEEK_END)
3229 actual = f.tell()
3239 actual = f.tell()
3230 f.close()
3240 f.close()
3231 s = self.index.entry_size
3241 s = self.index.entry_size
3232 i = max(0, actual // s)
3242 i = max(0, actual // s)
3233 di = actual - (i * s)
3243 di = actual - (i * s)
3234 if self._inline:
3244 if self._inline:
3235 databytes = 0
3245 databytes = 0
3236 for r in self:
3246 for r in self:
3237 databytes += max(0, self.length(r))
3247 databytes += max(0, self.length(r))
3238 dd = 0
3248 dd = 0
3239 di = actual - len(self) * s - databytes
3249 di = actual - len(self) * s - databytes
3240 except FileNotFoundError:
3250 except FileNotFoundError:
3241 di = 0
3251 di = 0
3242
3252
3243 return (dd, di)
3253 return (dd, di)
3244
3254
3245 def files(self):
3255 def files(self):
3246 res = [self._indexfile]
3256 res = [self._indexfile]
3247 if self._docket_file is None:
3257 if self._docket_file is None:
3248 if not self._inline:
3258 if not self._inline:
3249 res.append(self._datafile)
3259 res.append(self._datafile)
3250 else:
3260 else:
3251 res.append(self._docket_file)
3261 res.append(self._docket_file)
3252 res.extend(self._docket.old_index_filepaths(include_empty=False))
3262 res.extend(self._docket.old_index_filepaths(include_empty=False))
3253 if self._docket.data_end:
3263 if self._docket.data_end:
3254 res.append(self._datafile)
3264 res.append(self._datafile)
3255 res.extend(self._docket.old_data_filepaths(include_empty=False))
3265 res.extend(self._docket.old_data_filepaths(include_empty=False))
3256 if self._docket.sidedata_end:
3266 if self._docket.sidedata_end:
3257 res.append(self._sidedatafile)
3267 res.append(self._sidedatafile)
3258 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3268 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3259 return res
3269 return res
3260
3270
3261 def emitrevisions(
3271 def emitrevisions(
3262 self,
3272 self,
3263 nodes,
3273 nodes,
3264 nodesorder=None,
3274 nodesorder=None,
3265 revisiondata=False,
3275 revisiondata=False,
3266 assumehaveparentrevisions=False,
3276 assumehaveparentrevisions=False,
3267 deltamode=repository.CG_DELTAMODE_STD,
3277 deltamode=repository.CG_DELTAMODE_STD,
3268 sidedata_helpers=None,
3278 sidedata_helpers=None,
3269 debug_info=None,
3279 debug_info=None,
3270 ):
3280 ):
3271 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3281 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3272 raise error.ProgrammingError(
3282 raise error.ProgrammingError(
3273 b'unhandled value for nodesorder: %s' % nodesorder
3283 b'unhandled value for nodesorder: %s' % nodesorder
3274 )
3284 )
3275
3285
3276 if nodesorder is None and not self._generaldelta:
3286 if nodesorder is None and not self._generaldelta:
3277 nodesorder = b'storage'
3287 nodesorder = b'storage'
3278
3288
3279 if (
3289 if (
3280 not self._storedeltachains
3290 not self._storedeltachains
3281 and deltamode != repository.CG_DELTAMODE_PREV
3291 and deltamode != repository.CG_DELTAMODE_PREV
3282 ):
3292 ):
3283 deltamode = repository.CG_DELTAMODE_FULL
3293 deltamode = repository.CG_DELTAMODE_FULL
3284
3294
3285 return storageutil.emitrevisions(
3295 return storageutil.emitrevisions(
3286 self,
3296 self,
3287 nodes,
3297 nodes,
3288 nodesorder,
3298 nodesorder,
3289 revlogrevisiondelta,
3299 revlogrevisiondelta,
3290 deltaparentfn=self.deltaparent,
3300 deltaparentfn=self.deltaparent,
3291 candeltafn=self._candelta,
3301 candeltafn=self._candelta,
3292 rawsizefn=self.rawsize,
3302 rawsizefn=self.rawsize,
3293 revdifffn=self.revdiff,
3303 revdifffn=self.revdiff,
3294 flagsfn=self.flags,
3304 flagsfn=self.flags,
3295 deltamode=deltamode,
3305 deltamode=deltamode,
3296 revisiondata=revisiondata,
3306 revisiondata=revisiondata,
3297 assumehaveparentrevisions=assumehaveparentrevisions,
3307 assumehaveparentrevisions=assumehaveparentrevisions,
3298 sidedata_helpers=sidedata_helpers,
3308 sidedata_helpers=sidedata_helpers,
3299 debug_info=debug_info,
3309 debug_info=debug_info,
3300 )
3310 )
3301
3311
3302 DELTAREUSEALWAYS = b'always'
3312 DELTAREUSEALWAYS = b'always'
3303 DELTAREUSESAMEREVS = b'samerevs'
3313 DELTAREUSESAMEREVS = b'samerevs'
3304 DELTAREUSENEVER = b'never'
3314 DELTAREUSENEVER = b'never'
3305
3315
3306 DELTAREUSEFULLADD = b'fulladd'
3316 DELTAREUSEFULLADD = b'fulladd'
3307
3317
3308 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3318 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3309
3319
3310 def clone(
3320 def clone(
3311 self,
3321 self,
3312 tr,
3322 tr,
3313 destrevlog,
3323 destrevlog,
3314 addrevisioncb=None,
3324 addrevisioncb=None,
3315 deltareuse=DELTAREUSESAMEREVS,
3325 deltareuse=DELTAREUSESAMEREVS,
3316 forcedeltabothparents=None,
3326 forcedeltabothparents=None,
3317 sidedata_helpers=None,
3327 sidedata_helpers=None,
3318 ):
3328 ):
3319 """Copy this revlog to another, possibly with format changes.
3329 """Copy this revlog to another, possibly with format changes.
3320
3330
3321 The destination revlog will contain the same revisions and nodes.
3331 The destination revlog will contain the same revisions and nodes.
3322 However, it may not be bit-for-bit identical due to e.g. delta encoding
3332 However, it may not be bit-for-bit identical due to e.g. delta encoding
3323 differences.
3333 differences.
3324
3334
3325 The ``deltareuse`` argument control how deltas from the existing revlog
3335 The ``deltareuse`` argument control how deltas from the existing revlog
3326 are preserved in the destination revlog. The argument can have the
3336 are preserved in the destination revlog. The argument can have the
3327 following values:
3337 following values:
3328
3338
3329 DELTAREUSEALWAYS
3339 DELTAREUSEALWAYS
3330 Deltas will always be reused (if possible), even if the destination
3340 Deltas will always be reused (if possible), even if the destination
3331 revlog would not select the same revisions for the delta. This is the
3341 revlog would not select the same revisions for the delta. This is the
3332 fastest mode of operation.
3342 fastest mode of operation.
3333 DELTAREUSESAMEREVS
3343 DELTAREUSESAMEREVS
3334 Deltas will be reused if the destination revlog would pick the same
3344 Deltas will be reused if the destination revlog would pick the same
3335 revisions for the delta. This mode strikes a balance between speed
3345 revisions for the delta. This mode strikes a balance between speed
3336 and optimization.
3346 and optimization.
3337 DELTAREUSENEVER
3347 DELTAREUSENEVER
3338 Deltas will never be reused. This is the slowest mode of execution.
3348 Deltas will never be reused. This is the slowest mode of execution.
3339 This mode can be used to recompute deltas (e.g. if the diff/delta
3349 This mode can be used to recompute deltas (e.g. if the diff/delta
3340 algorithm changes).
3350 algorithm changes).
3341 DELTAREUSEFULLADD
3351 DELTAREUSEFULLADD
3342 Revision will be re-added as if their were new content. This is
3352 Revision will be re-added as if their were new content. This is
3343 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3353 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3344 eg: large file detection and handling.
3354 eg: large file detection and handling.
3345
3355
3346 Delta computation can be slow, so the choice of delta reuse policy can
3356 Delta computation can be slow, so the choice of delta reuse policy can
3347 significantly affect run time.
3357 significantly affect run time.
3348
3358
3349 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3359 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3350 two extremes. Deltas will be reused if they are appropriate. But if the
3360 two extremes. Deltas will be reused if they are appropriate. But if the
3351 delta could choose a better revision, it will do so. This means if you
3361 delta could choose a better revision, it will do so. This means if you
3352 are converting a non-generaldelta revlog to a generaldelta revlog,
3362 are converting a non-generaldelta revlog to a generaldelta revlog,
3353 deltas will be recomputed if the delta's parent isn't a parent of the
3363 deltas will be recomputed if the delta's parent isn't a parent of the
3354 revision.
3364 revision.
3355
3365
3356 In addition to the delta policy, the ``forcedeltabothparents``
3366 In addition to the delta policy, the ``forcedeltabothparents``
3357 argument controls whether to force compute deltas against both parents
3367 argument controls whether to force compute deltas against both parents
3358 for merges. By default, the current default is used.
3368 for merges. By default, the current default is used.
3359
3369
3360 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3370 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3361 `sidedata_helpers`.
3371 `sidedata_helpers`.
3362 """
3372 """
3363 if deltareuse not in self.DELTAREUSEALL:
3373 if deltareuse not in self.DELTAREUSEALL:
3364 raise ValueError(
3374 raise ValueError(
3365 _(b'value for deltareuse invalid: %s') % deltareuse
3375 _(b'value for deltareuse invalid: %s') % deltareuse
3366 )
3376 )
3367
3377
3368 if len(destrevlog):
3378 if len(destrevlog):
3369 raise ValueError(_(b'destination revlog is not empty'))
3379 raise ValueError(_(b'destination revlog is not empty'))
3370
3380
3371 if getattr(self, 'filteredrevs', None):
3381 if getattr(self, 'filteredrevs', None):
3372 raise ValueError(_(b'source revlog has filtered revisions'))
3382 raise ValueError(_(b'source revlog has filtered revisions'))
3373 if getattr(destrevlog, 'filteredrevs', None):
3383 if getattr(destrevlog, 'filteredrevs', None):
3374 raise ValueError(_(b'destination revlog has filtered revisions'))
3384 raise ValueError(_(b'destination revlog has filtered revisions'))
3375
3385
3376 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3386 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3377 # if possible.
3387 # if possible.
3378 oldlazydelta = destrevlog._lazydelta
3388 old_delta_config = destrevlog.delta_config
3379 oldlazydeltabase = destrevlog._lazydeltabase
3389 destrevlog.delta_config = destrevlog.delta_config.copy()
3380 oldamd = destrevlog._deltabothparents
3381
3390
3382 try:
3391 try:
3383 if deltareuse == self.DELTAREUSEALWAYS:
3392 if deltareuse == self.DELTAREUSEALWAYS:
3384 destrevlog.delta_config.lazy_delta_base = True
3393 destrevlog.delta_config.lazy_delta_base = True
3385 destrevlog.delta_config.lazy_delta = True
3394 destrevlog.delta_config.lazy_delta = True
3386 elif deltareuse == self.DELTAREUSESAMEREVS:
3395 elif deltareuse == self.DELTAREUSESAMEREVS:
3387 destrevlog.delta_config.lazy_delta_base = False
3396 destrevlog.delta_config.lazy_delta_base = False
3388 destrevlog.delta_config.lazy_delta = True
3397 destrevlog.delta_config.lazy_delta = True
3389 elif deltareuse == self.DELTAREUSENEVER:
3398 elif deltareuse == self.DELTAREUSENEVER:
3390 destrevlog.delta_config.lazy_delta_base = False
3399 destrevlog.delta_config.lazy_delta_base = False
3391 destrevlog.delta_config.lazy_delta = False
3400 destrevlog.delta_config.lazy_delta = False
3392
3401
3393 delta_both_parents = forcedeltabothparents or oldamd
3402 delta_both_parents = (
3403 forcedeltabothparents or old_delta_config.delta_both_parents
3404 )
3394 destrevlog.delta_config.delta_both_parents = delta_both_parents
3405 destrevlog.delta_config.delta_both_parents = delta_both_parents
3395
3406
3396 with self.reading():
3407 with self.reading():
3397 self._clone(
3408 self._clone(
3398 tr,
3409 tr,
3399 destrevlog,
3410 destrevlog,
3400 addrevisioncb,
3411 addrevisioncb,
3401 deltareuse,
3412 deltareuse,
3402 forcedeltabothparents,
3413 forcedeltabothparents,
3403 sidedata_helpers,
3414 sidedata_helpers,
3404 )
3415 )
3405
3416
3406 finally:
3417 finally:
3407 destrevlog.delta_config.lazy_delta = oldlazydelta
3418 destrevlog.delta_config = old_delta_config
3408 destrevlog.delta_config.lazy_delta_base = oldlazydeltabase
3409 destrevlog.delta_config.delta_both_parents = oldamd
3410
3419
3411 def _clone(
3420 def _clone(
3412 self,
3421 self,
3413 tr,
3422 tr,
3414 destrevlog,
3423 destrevlog,
3415 addrevisioncb,
3424 addrevisioncb,
3416 deltareuse,
3425 deltareuse,
3417 forcedeltabothparents,
3426 forcedeltabothparents,
3418 sidedata_helpers,
3427 sidedata_helpers,
3419 ):
3428 ):
3420 """perform the core duty of `revlog.clone` after parameter processing"""
3429 """perform the core duty of `revlog.clone` after parameter processing"""
3421 write_debug = None
3430 write_debug = None
3422 if self._debug_delta:
3431 if self._debug_delta:
3423 write_debug = tr._report
3432 write_debug = tr._report
3424 deltacomputer = deltautil.deltacomputer(
3433 deltacomputer = deltautil.deltacomputer(
3425 destrevlog,
3434 destrevlog,
3426 write_debug=write_debug,
3435 write_debug=write_debug,
3427 )
3436 )
3428 index = self.index
3437 index = self.index
3429 for rev in self:
3438 for rev in self:
3430 entry = index[rev]
3439 entry = index[rev]
3431
3440
3432 # Some classes override linkrev to take filtered revs into
3441 # Some classes override linkrev to take filtered revs into
3433 # account. Use raw entry from index.
3442 # account. Use raw entry from index.
3434 flags = entry[0] & 0xFFFF
3443 flags = entry[0] & 0xFFFF
3435 linkrev = entry[4]
3444 linkrev = entry[4]
3436 p1 = index[entry[5]][7]
3445 p1 = index[entry[5]][7]
3437 p2 = index[entry[6]][7]
3446 p2 = index[entry[6]][7]
3438 node = entry[7]
3447 node = entry[7]
3439
3448
3440 # (Possibly) reuse the delta from the revlog if allowed and
3449 # (Possibly) reuse the delta from the revlog if allowed and
3441 # the revlog chunk is a delta.
3450 # the revlog chunk is a delta.
3442 cachedelta = None
3451 cachedelta = None
3443 rawtext = None
3452 rawtext = None
3444 if deltareuse == self.DELTAREUSEFULLADD:
3453 if deltareuse == self.DELTAREUSEFULLADD:
3445 text = self._revisiondata(rev)
3454 text = self._revisiondata(rev)
3446 sidedata = self.sidedata(rev)
3455 sidedata = self.sidedata(rev)
3447
3456
3448 if sidedata_helpers is not None:
3457 if sidedata_helpers is not None:
3449 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3458 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3450 self, sidedata_helpers, sidedata, rev
3459 self, sidedata_helpers, sidedata, rev
3451 )
3460 )
3452 flags = flags | new_flags[0] & ~new_flags[1]
3461 flags = flags | new_flags[0] & ~new_flags[1]
3453
3462
3454 destrevlog.addrevision(
3463 destrevlog.addrevision(
3455 text,
3464 text,
3456 tr,
3465 tr,
3457 linkrev,
3466 linkrev,
3458 p1,
3467 p1,
3459 p2,
3468 p2,
3460 cachedelta=cachedelta,
3469 cachedelta=cachedelta,
3461 node=node,
3470 node=node,
3462 flags=flags,
3471 flags=flags,
3463 deltacomputer=deltacomputer,
3472 deltacomputer=deltacomputer,
3464 sidedata=sidedata,
3473 sidedata=sidedata,
3465 )
3474 )
3466 else:
3475 else:
3467 if destrevlog._lazydelta:
3476 if destrevlog._lazydelta:
3468 dp = self.deltaparent(rev)
3477 dp = self.deltaparent(rev)
3469 if dp != nullrev:
3478 if dp != nullrev:
3470 cachedelta = (dp, bytes(self._chunk(rev)))
3479 cachedelta = (dp, bytes(self._chunk(rev)))
3471
3480
3472 sidedata = None
3481 sidedata = None
3473 if not cachedelta:
3482 if not cachedelta:
3474 rawtext = self._revisiondata(rev)
3483 rawtext = self._revisiondata(rev)
3475 sidedata = self.sidedata(rev)
3484 sidedata = self.sidedata(rev)
3476 if sidedata is None:
3485 if sidedata is None:
3477 sidedata = self.sidedata(rev)
3486 sidedata = self.sidedata(rev)
3478
3487
3479 if sidedata_helpers is not None:
3488 if sidedata_helpers is not None:
3480 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3489 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3481 self, sidedata_helpers, sidedata, rev
3490 self, sidedata_helpers, sidedata, rev
3482 )
3491 )
3483 flags = flags | new_flags[0] & ~new_flags[1]
3492 flags = flags | new_flags[0] & ~new_flags[1]
3484
3493
3485 with destrevlog._writing(tr):
3494 with destrevlog._writing(tr):
3486 destrevlog._addrevision(
3495 destrevlog._addrevision(
3487 node,
3496 node,
3488 rawtext,
3497 rawtext,
3489 tr,
3498 tr,
3490 linkrev,
3499 linkrev,
3491 p1,
3500 p1,
3492 p2,
3501 p2,
3493 flags,
3502 flags,
3494 cachedelta,
3503 cachedelta,
3495 deltacomputer=deltacomputer,
3504 deltacomputer=deltacomputer,
3496 sidedata=sidedata,
3505 sidedata=sidedata,
3497 )
3506 )
3498
3507
3499 if addrevisioncb:
3508 if addrevisioncb:
3500 addrevisioncb(self, rev, node)
3509 addrevisioncb(self, rev, node)
3501
3510
3502 def censorrevision(self, tr, censornode, tombstone=b''):
3511 def censorrevision(self, tr, censornode, tombstone=b''):
3503 if self._format_version == REVLOGV0:
3512 if self._format_version == REVLOGV0:
3504 raise error.RevlogError(
3513 raise error.RevlogError(
3505 _(b'cannot censor with version %d revlogs')
3514 _(b'cannot censor with version %d revlogs')
3506 % self._format_version
3515 % self._format_version
3507 )
3516 )
3508 elif self._format_version == REVLOGV1:
3517 elif self._format_version == REVLOGV1:
3509 rewrite.v1_censor(self, tr, censornode, tombstone)
3518 rewrite.v1_censor(self, tr, censornode, tombstone)
3510 else:
3519 else:
3511 rewrite.v2_censor(self, tr, censornode, tombstone)
3520 rewrite.v2_censor(self, tr, censornode, tombstone)
3512
3521
3513 def verifyintegrity(self, state):
3522 def verifyintegrity(self, state):
3514 """Verifies the integrity of the revlog.
3523 """Verifies the integrity of the revlog.
3515
3524
3516 Yields ``revlogproblem`` instances describing problems that are
3525 Yields ``revlogproblem`` instances describing problems that are
3517 found.
3526 found.
3518 """
3527 """
3519 dd, di = self.checksize()
3528 dd, di = self.checksize()
3520 if dd:
3529 if dd:
3521 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3530 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3522 if di:
3531 if di:
3523 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3532 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3524
3533
3525 version = self._format_version
3534 version = self._format_version
3526
3535
3527 # The verifier tells us what version revlog we should be.
3536 # The verifier tells us what version revlog we should be.
3528 if version != state[b'expectedversion']:
3537 if version != state[b'expectedversion']:
3529 yield revlogproblem(
3538 yield revlogproblem(
3530 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3539 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3531 % (self.display_id, version, state[b'expectedversion'])
3540 % (self.display_id, version, state[b'expectedversion'])
3532 )
3541 )
3533
3542
3534 state[b'skipread'] = set()
3543 state[b'skipread'] = set()
3535 state[b'safe_renamed'] = set()
3544 state[b'safe_renamed'] = set()
3536
3545
3537 for rev in self:
3546 for rev in self:
3538 node = self.node(rev)
3547 node = self.node(rev)
3539
3548
3540 # Verify contents. 4 cases to care about:
3549 # Verify contents. 4 cases to care about:
3541 #
3550 #
3542 # common: the most common case
3551 # common: the most common case
3543 # rename: with a rename
3552 # rename: with a rename
3544 # meta: file content starts with b'\1\n', the metadata
3553 # meta: file content starts with b'\1\n', the metadata
3545 # header defined in filelog.py, but without a rename
3554 # header defined in filelog.py, but without a rename
3546 # ext: content stored externally
3555 # ext: content stored externally
3547 #
3556 #
3548 # More formally, their differences are shown below:
3557 # More formally, their differences are shown below:
3549 #
3558 #
3550 # | common | rename | meta | ext
3559 # | common | rename | meta | ext
3551 # -------------------------------------------------------
3560 # -------------------------------------------------------
3552 # flags() | 0 | 0 | 0 | not 0
3561 # flags() | 0 | 0 | 0 | not 0
3553 # renamed() | False | True | False | ?
3562 # renamed() | False | True | False | ?
3554 # rawtext[0:2]=='\1\n'| False | True | True | ?
3563 # rawtext[0:2]=='\1\n'| False | True | True | ?
3555 #
3564 #
3556 # "rawtext" means the raw text stored in revlog data, which
3565 # "rawtext" means the raw text stored in revlog data, which
3557 # could be retrieved by "rawdata(rev)". "text"
3566 # could be retrieved by "rawdata(rev)". "text"
3558 # mentioned below is "revision(rev)".
3567 # mentioned below is "revision(rev)".
3559 #
3568 #
3560 # There are 3 different lengths stored physically:
3569 # There are 3 different lengths stored physically:
3561 # 1. L1: rawsize, stored in revlog index
3570 # 1. L1: rawsize, stored in revlog index
3562 # 2. L2: len(rawtext), stored in revlog data
3571 # 2. L2: len(rawtext), stored in revlog data
3563 # 3. L3: len(text), stored in revlog data if flags==0, or
3572 # 3. L3: len(text), stored in revlog data if flags==0, or
3564 # possibly somewhere else if flags!=0
3573 # possibly somewhere else if flags!=0
3565 #
3574 #
3566 # L1 should be equal to L2. L3 could be different from them.
3575 # L1 should be equal to L2. L3 could be different from them.
3567 # "text" may or may not affect commit hash depending on flag
3576 # "text" may or may not affect commit hash depending on flag
3568 # processors (see flagutil.addflagprocessor).
3577 # processors (see flagutil.addflagprocessor).
3569 #
3578 #
3570 # | common | rename | meta | ext
3579 # | common | rename | meta | ext
3571 # -------------------------------------------------
3580 # -------------------------------------------------
3572 # rawsize() | L1 | L1 | L1 | L1
3581 # rawsize() | L1 | L1 | L1 | L1
3573 # size() | L1 | L2-LM | L1(*) | L1 (?)
3582 # size() | L1 | L2-LM | L1(*) | L1 (?)
3574 # len(rawtext) | L2 | L2 | L2 | L2
3583 # len(rawtext) | L2 | L2 | L2 | L2
3575 # len(text) | L2 | L2 | L2 | L3
3584 # len(text) | L2 | L2 | L2 | L3
3576 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3585 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3577 #
3586 #
3578 # LM: length of metadata, depending on rawtext
3587 # LM: length of metadata, depending on rawtext
3579 # (*): not ideal, see comment in filelog.size
3588 # (*): not ideal, see comment in filelog.size
3580 # (?): could be "- len(meta)" if the resolved content has
3589 # (?): could be "- len(meta)" if the resolved content has
3581 # rename metadata
3590 # rename metadata
3582 #
3591 #
3583 # Checks needed to be done:
3592 # Checks needed to be done:
3584 # 1. length check: L1 == L2, in all cases.
3593 # 1. length check: L1 == L2, in all cases.
3585 # 2. hash check: depending on flag processor, we may need to
3594 # 2. hash check: depending on flag processor, we may need to
3586 # use either "text" (external), or "rawtext" (in revlog).
3595 # use either "text" (external), or "rawtext" (in revlog).
3587
3596
3588 try:
3597 try:
3589 skipflags = state.get(b'skipflags', 0)
3598 skipflags = state.get(b'skipflags', 0)
3590 if skipflags:
3599 if skipflags:
3591 skipflags &= self.flags(rev)
3600 skipflags &= self.flags(rev)
3592
3601
3593 _verify_revision(self, skipflags, state, node)
3602 _verify_revision(self, skipflags, state, node)
3594
3603
3595 l1 = self.rawsize(rev)
3604 l1 = self.rawsize(rev)
3596 l2 = len(self.rawdata(node))
3605 l2 = len(self.rawdata(node))
3597
3606
3598 if l1 != l2:
3607 if l1 != l2:
3599 yield revlogproblem(
3608 yield revlogproblem(
3600 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3609 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3601 node=node,
3610 node=node,
3602 )
3611 )
3603
3612
3604 except error.CensoredNodeError:
3613 except error.CensoredNodeError:
3605 if state[b'erroroncensored']:
3614 if state[b'erroroncensored']:
3606 yield revlogproblem(
3615 yield revlogproblem(
3607 error=_(b'censored file data'), node=node
3616 error=_(b'censored file data'), node=node
3608 )
3617 )
3609 state[b'skipread'].add(node)
3618 state[b'skipread'].add(node)
3610 except Exception as e:
3619 except Exception as e:
3611 yield revlogproblem(
3620 yield revlogproblem(
3612 error=_(b'unpacking %s: %s')
3621 error=_(b'unpacking %s: %s')
3613 % (short(node), stringutil.forcebytestr(e)),
3622 % (short(node), stringutil.forcebytestr(e)),
3614 node=node,
3623 node=node,
3615 )
3624 )
3616 state[b'skipread'].add(node)
3625 state[b'skipread'].add(node)
3617
3626
3618 def storageinfo(
3627 def storageinfo(
3619 self,
3628 self,
3620 exclusivefiles=False,
3629 exclusivefiles=False,
3621 sharedfiles=False,
3630 sharedfiles=False,
3622 revisionscount=False,
3631 revisionscount=False,
3623 trackedsize=False,
3632 trackedsize=False,
3624 storedsize=False,
3633 storedsize=False,
3625 ):
3634 ):
3626 d = {}
3635 d = {}
3627
3636
3628 if exclusivefiles:
3637 if exclusivefiles:
3629 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3638 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3630 if not self._inline:
3639 if not self._inline:
3631 d[b'exclusivefiles'].append((self.opener, self._datafile))
3640 d[b'exclusivefiles'].append((self.opener, self._datafile))
3632
3641
3633 if sharedfiles:
3642 if sharedfiles:
3634 d[b'sharedfiles'] = []
3643 d[b'sharedfiles'] = []
3635
3644
3636 if revisionscount:
3645 if revisionscount:
3637 d[b'revisionscount'] = len(self)
3646 d[b'revisionscount'] = len(self)
3638
3647
3639 if trackedsize:
3648 if trackedsize:
3640 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3649 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3641
3650
3642 if storedsize:
3651 if storedsize:
3643 d[b'storedsize'] = sum(
3652 d[b'storedsize'] = sum(
3644 self.opener.stat(path).st_size for path in self.files()
3653 self.opener.stat(path).st_size for path in self.files()
3645 )
3654 )
3646
3655
3647 return d
3656 return d
3648
3657
3649 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3658 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3650 if not self.hassidedata:
3659 if not self.hassidedata:
3651 return
3660 return
3652 # revlog formats with sidedata support does not support inline
3661 # revlog formats with sidedata support does not support inline
3653 assert not self._inline
3662 assert not self._inline
3654 if not helpers[1] and not helpers[2]:
3663 if not helpers[1] and not helpers[2]:
3655 # Nothing to generate or remove
3664 # Nothing to generate or remove
3656 return
3665 return
3657
3666
3658 new_entries = []
3667 new_entries = []
3659 # append the new sidedata
3668 # append the new sidedata
3660 with self._writing(transaction):
3669 with self._writing(transaction):
3661 ifh, dfh, sdfh = self._writinghandles
3670 ifh, dfh, sdfh = self._writinghandles
3662 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3671 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3663
3672
3664 current_offset = sdfh.tell()
3673 current_offset = sdfh.tell()
3665 for rev in range(startrev, endrev + 1):
3674 for rev in range(startrev, endrev + 1):
3666 entry = self.index[rev]
3675 entry = self.index[rev]
3667 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3676 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3668 store=self,
3677 store=self,
3669 sidedata_helpers=helpers,
3678 sidedata_helpers=helpers,
3670 sidedata={},
3679 sidedata={},
3671 rev=rev,
3680 rev=rev,
3672 )
3681 )
3673
3682
3674 serialized_sidedata = sidedatautil.serialize_sidedata(
3683 serialized_sidedata = sidedatautil.serialize_sidedata(
3675 new_sidedata
3684 new_sidedata
3676 )
3685 )
3677
3686
3678 sidedata_compression_mode = COMP_MODE_INLINE
3687 sidedata_compression_mode = COMP_MODE_INLINE
3679 if serialized_sidedata and self.hassidedata:
3688 if serialized_sidedata and self.hassidedata:
3680 sidedata_compression_mode = COMP_MODE_PLAIN
3689 sidedata_compression_mode = COMP_MODE_PLAIN
3681 h, comp_sidedata = self.compress(serialized_sidedata)
3690 h, comp_sidedata = self.compress(serialized_sidedata)
3682 if (
3691 if (
3683 h != b'u'
3692 h != b'u'
3684 and comp_sidedata[0] != b'\0'
3693 and comp_sidedata[0] != b'\0'
3685 and len(comp_sidedata) < len(serialized_sidedata)
3694 and len(comp_sidedata) < len(serialized_sidedata)
3686 ):
3695 ):
3687 assert not h
3696 assert not h
3688 if (
3697 if (
3689 comp_sidedata[0]
3698 comp_sidedata[0]
3690 == self._docket.default_compression_header
3699 == self._docket.default_compression_header
3691 ):
3700 ):
3692 sidedata_compression_mode = COMP_MODE_DEFAULT
3701 sidedata_compression_mode = COMP_MODE_DEFAULT
3693 serialized_sidedata = comp_sidedata
3702 serialized_sidedata = comp_sidedata
3694 else:
3703 else:
3695 sidedata_compression_mode = COMP_MODE_INLINE
3704 sidedata_compression_mode = COMP_MODE_INLINE
3696 serialized_sidedata = comp_sidedata
3705 serialized_sidedata = comp_sidedata
3697 if entry[8] != 0 or entry[9] != 0:
3706 if entry[8] != 0 or entry[9] != 0:
3698 # rewriting entries that already have sidedata is not
3707 # rewriting entries that already have sidedata is not
3699 # supported yet, because it introduces garbage data in the
3708 # supported yet, because it introduces garbage data in the
3700 # revlog.
3709 # revlog.
3701 msg = b"rewriting existing sidedata is not supported yet"
3710 msg = b"rewriting existing sidedata is not supported yet"
3702 raise error.Abort(msg)
3711 raise error.Abort(msg)
3703
3712
3704 # Apply (potential) flags to add and to remove after running
3713 # Apply (potential) flags to add and to remove after running
3705 # the sidedata helpers
3714 # the sidedata helpers
3706 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3715 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3707 entry_update = (
3716 entry_update = (
3708 current_offset,
3717 current_offset,
3709 len(serialized_sidedata),
3718 len(serialized_sidedata),
3710 new_offset_flags,
3719 new_offset_flags,
3711 sidedata_compression_mode,
3720 sidedata_compression_mode,
3712 )
3721 )
3713
3722
3714 # the sidedata computation might have move the file cursors around
3723 # the sidedata computation might have move the file cursors around
3715 sdfh.seek(current_offset, os.SEEK_SET)
3724 sdfh.seek(current_offset, os.SEEK_SET)
3716 sdfh.write(serialized_sidedata)
3725 sdfh.write(serialized_sidedata)
3717 new_entries.append(entry_update)
3726 new_entries.append(entry_update)
3718 current_offset += len(serialized_sidedata)
3727 current_offset += len(serialized_sidedata)
3719 self._docket.sidedata_end = sdfh.tell()
3728 self._docket.sidedata_end = sdfh.tell()
3720
3729
3721 # rewrite the new index entries
3730 # rewrite the new index entries
3722 ifh.seek(startrev * self.index.entry_size)
3731 ifh.seek(startrev * self.index.entry_size)
3723 for i, e in enumerate(new_entries):
3732 for i, e in enumerate(new_entries):
3724 rev = startrev + i
3733 rev = startrev + i
3725 self.index.replace_sidedata_info(rev, *e)
3734 self.index.replace_sidedata_info(rev, *e)
3726 packed = self.index.entry_binary(rev)
3735 packed = self.index.entry_binary(rev)
3727 if rev == 0 and self._docket is None:
3736 if rev == 0 and self._docket is None:
3728 header = self._format_flags | self._format_version
3737 header = self._format_flags | self._format_version
3729 header = self.index.pack_header(header)
3738 header = self.index.pack_header(header)
3730 packed = header + packed
3739 packed = header + packed
3731 ifh.write(packed)
3740 ifh.write(packed)
@@ -1,525 +1,528 b''
1 # test revlog interaction about raw data (flagprocessor)
1 # test revlog interaction about raw data (flagprocessor)
2
2
3
3
4 import hashlib
4 import hashlib
5 import sys
5 import sys
6
6
7 from mercurial import (
7 from mercurial import (
8 encoding,
8 encoding,
9 revlog,
9 revlog,
10 transaction,
10 transaction,
11 vfs,
11 vfs,
12 )
12 )
13
13
14 from mercurial.revlogutils import (
14 from mercurial.revlogutils import (
15 constants,
15 constants,
16 deltas,
16 deltas,
17 flagutil,
17 flagutil,
18 )
18 )
19
19
20
20
21 class _NoTransaction:
21 class _NoTransaction:
22 """transaction like object to update the nodemap outside a transaction"""
22 """transaction like object to update the nodemap outside a transaction"""
23
23
24 def __init__(self):
24 def __init__(self):
25 self._postclose = {}
25 self._postclose = {}
26
26
27 def addpostclose(self, callback_id, callback_func):
27 def addpostclose(self, callback_id, callback_func):
28 self._postclose[callback_id] = callback_func
28 self._postclose[callback_id] = callback_func
29
29
30 def registertmp(self, *args, **kwargs):
30 def registertmp(self, *args, **kwargs):
31 pass
31 pass
32
32
33 def addbackup(self, *args, **kwargs):
33 def addbackup(self, *args, **kwargs):
34 pass
34 pass
35
35
36 def add(self, *args, **kwargs):
36 def add(self, *args, **kwargs):
37 pass
37 pass
38
38
39 def addabort(self, *args, **kwargs):
39 def addabort(self, *args, **kwargs):
40 pass
40 pass
41
41
42 def _report(self, *args):
42 def _report(self, *args):
43 pass
43 pass
44
44
45
45
46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
46 # TESTTMP is optional. This makes it convenient to run without run-tests.py
47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
47 tvfs = vfs.vfs(encoding.environ.get(b'TESTTMP', b'/tmp'))
48
48
49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
49 # Enable generaldelta otherwise revlog won't use delta as expected by the test
50 tvfs.options = {
50 tvfs.options = {
51 b'generaldelta': True,
51 b'generaldelta': True,
52 b'revlogv1': True,
52 b'revlogv1': True,
53 b'sparse-revlog': True,
53 b'sparse-revlog': True,
54 }
54 }
55
55
56
56
57 def abort(msg):
57 def abort(msg):
58 print('abort: %s' % msg)
58 print('abort: %s' % msg)
59 # Return 0 so run-tests.py could compare the output.
59 # Return 0 so run-tests.py could compare the output.
60 sys.exit()
60 sys.exit()
61
61
62
62
63 # Register a revlog processor for flag EXTSTORED.
63 # Register a revlog processor for flag EXTSTORED.
64 #
64 #
65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
65 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
66 # insertion and replacement, and may be interesting to test revlog's line-based
66 # insertion and replacement, and may be interesting to test revlog's line-based
67 # deltas.
67 # deltas.
68 _extheader = b'E\n'
68 _extheader = b'E\n'
69
69
70
70
71 def readprocessor(self, rawtext):
71 def readprocessor(self, rawtext):
72 # True: the returned text could be used to verify hash
72 # True: the returned text could be used to verify hash
73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
73 text = rawtext[len(_extheader) :].replace(b'i', b'1')
74 return text, True
74 return text, True
75
75
76
76
77 def writeprocessor(self, text):
77 def writeprocessor(self, text):
78 # False: the returned rawtext shouldn't be used to verify hash
78 # False: the returned rawtext shouldn't be used to verify hash
79 rawtext = _extheader + text.replace(b'1', b'i')
79 rawtext = _extheader + text.replace(b'1', b'i')
80 return rawtext, False
80 return rawtext, False
81
81
82
82
83 def rawprocessor(self, rawtext):
83 def rawprocessor(self, rawtext):
84 # False: do not verify hash. Only the content returned by "readprocessor"
84 # False: do not verify hash. Only the content returned by "readprocessor"
85 # can be used to verify hash.
85 # can be used to verify hash.
86 return False
86 return False
87
87
88
88
89 flagutil.addflagprocessor(
89 flagutil.addflagprocessor(
90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
90 revlog.REVIDX_EXTSTORED, (readprocessor, writeprocessor, rawprocessor)
91 )
91 )
92
92
93 # Utilities about reading and appending revlog
93 # Utilities about reading and appending revlog
94
94
95
95
96 def newtransaction():
96 def newtransaction():
97 # A transaction is required to write revlogs
97 # A transaction is required to write revlogs
98 report = lambda msg: None
98 report = lambda msg: None
99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
99 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
100
100
101
101
102 def newrevlog(name=b'_testrevlog', recreate=False):
102 def newrevlog(name=b'_testrevlog', recreate=False):
103 if recreate:
103 if recreate:
104 tvfs.tryunlink(name + b'.i')
104 tvfs.tryunlink(name + b'.i')
105 target = (constants.KIND_OTHER, b'test')
105 target = (constants.KIND_OTHER, b'test')
106 rlog = revlog.revlog(tvfs, target=target, radix=name)
106 rlog = revlog.revlog(tvfs, target=target, radix=name)
107 return rlog
107 return rlog
108
108
109
109
110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
110 def appendrev(rlog, text, tr, isext=False, isdelta=True):
111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
111 """Append a revision. If isext is True, set the EXTSTORED flag so flag
112 processor will be used (and rawtext is different from text). If isdelta is
112 processor will be used (and rawtext is different from text). If isdelta is
113 True, force the revision to be a delta, otherwise it's full text.
113 True, force the revision to be a delta, otherwise it's full text.
114 """
114 """
115 nextrev = len(rlog)
115 nextrev = len(rlog)
116 p1 = rlog.node(nextrev - 1)
116 p1 = rlog.node(nextrev - 1)
117 p2 = rlog.nullid
117 p2 = rlog.nullid
118 if isext:
118 if isext:
119 flags = revlog.REVIDX_EXTSTORED
119 flags = revlog.REVIDX_EXTSTORED
120 else:
120 else:
121 flags = revlog.REVIDX_DEFAULT_FLAGS
121 flags = revlog.REVIDX_DEFAULT_FLAGS
122 # Change storedeltachains temporarily, to override revlog's delta decision
122 # Change storedeltachains temporarily, to override revlog's delta decision
123 rlog._storedeltachains = isdelta
123 rlog._storedeltachains = isdelta
124 try:
124 try:
125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
125 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
126 return nextrev
126 return nextrev
127 except Exception as ex:
127 except Exception as ex:
128 abort('rev %d: failed to append: %s' % (nextrev, ex))
128 abort('rev %d: failed to append: %s' % (nextrev, ex))
129 finally:
129 finally:
130 # Restore storedeltachains. It is always True, see revlog.__init__
130 # Restore storedeltachains. It is always True, see revlog.__init__
131 rlog._storedeltachains = True
131 rlog._storedeltachains = True
132
132
133
133
134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
134 def addgroupcopy(rlog, tr, destname=b'_destrevlog', optimaldelta=True):
135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
135 """Copy revlog to destname using revlog.addgroup. Return the copied revlog.
136
136
137 This emulates push or pull. They use changegroup. Changegroup requires
137 This emulates push or pull. They use changegroup. Changegroup requires
138 repo to work. We don't have a repo, so a dummy changegroup is used.
138 repo to work. We don't have a repo, so a dummy changegroup is used.
139
139
140 If optimaldelta is True, use optimized delta parent, so the destination
140 If optimaldelta is True, use optimized delta parent, so the destination
141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
141 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
142 the destination revlog needs more work to use it.
142 the destination revlog needs more work to use it.
143
143
144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
144 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
145 code path, which is not covered by "appendrev" alone.
145 code path, which is not covered by "appendrev" alone.
146 """
146 """
147
147
148 class dummychangegroup:
148 class dummychangegroup:
149 @staticmethod
149 @staticmethod
150 def deltachunk(pnode):
150 def deltachunk(pnode):
151 pnode = pnode or rlog.nullid
151 pnode = pnode or rlog.nullid
152 parentrev = rlog.rev(pnode)
152 parentrev = rlog.rev(pnode)
153 r = parentrev + 1
153 r = parentrev + 1
154 if r >= len(rlog):
154 if r >= len(rlog):
155 return {}
155 return {}
156 if optimaldelta:
156 if optimaldelta:
157 deltaparent = parentrev
157 deltaparent = parentrev
158 else:
158 else:
159 # suboptimal deltaparent
159 # suboptimal deltaparent
160 deltaparent = min(0, parentrev)
160 deltaparent = min(0, parentrev)
161 if not rlog._candelta(deltaparent, r):
161 if not rlog._candelta(deltaparent, r):
162 deltaparent = -1
162 deltaparent = -1
163 return {
163 return {
164 b'node': rlog.node(r),
164 b'node': rlog.node(r),
165 b'p1': pnode,
165 b'p1': pnode,
166 b'p2': rlog.nullid,
166 b'p2': rlog.nullid,
167 b'cs': rlog.node(rlog.linkrev(r)),
167 b'cs': rlog.node(rlog.linkrev(r)),
168 b'flags': rlog.flags(r),
168 b'flags': rlog.flags(r),
169 b'deltabase': rlog.node(deltaparent),
169 b'deltabase': rlog.node(deltaparent),
170 b'delta': rlog.revdiff(deltaparent, r),
170 b'delta': rlog.revdiff(deltaparent, r),
171 b'sidedata': rlog.sidedata(r),
171 b'sidedata': rlog.sidedata(r),
172 }
172 }
173
173
174 def deltaiter(self):
174 def deltaiter(self):
175 chain = None
175 chain = None
176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
176 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
177 node = chunkdata[b'node']
177 node = chunkdata[b'node']
178 p1 = chunkdata[b'p1']
178 p1 = chunkdata[b'p1']
179 p2 = chunkdata[b'p2']
179 p2 = chunkdata[b'p2']
180 cs = chunkdata[b'cs']
180 cs = chunkdata[b'cs']
181 deltabase = chunkdata[b'deltabase']
181 deltabase = chunkdata[b'deltabase']
182 delta = chunkdata[b'delta']
182 delta = chunkdata[b'delta']
183 flags = chunkdata[b'flags']
183 flags = chunkdata[b'flags']
184 sidedata = chunkdata[b'sidedata']
184 sidedata = chunkdata[b'sidedata']
185
185
186 chain = node
186 chain = node
187
187
188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
188 yield (node, p1, p2, cs, deltabase, delta, flags, sidedata)
189
189
190 def linkmap(lnode):
190 def linkmap(lnode):
191 return rlog.rev(lnode)
191 return rlog.rev(lnode)
192
192
193 dlog = newrevlog(destname, recreate=True)
193 dlog = newrevlog(destname, recreate=True)
194 dummydeltas = dummychangegroup().deltaiter()
194 dummydeltas = dummychangegroup().deltaiter()
195 dlog.addgroup(dummydeltas, linkmap, tr)
195 dlog.addgroup(dummydeltas, linkmap, tr)
196 return dlog
196 return dlog
197
197
198
198
199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
199 def lowlevelcopy(rlog, tr, destname=b'_destrevlog'):
200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
200 """Like addgroupcopy, but use the low level revlog._addrevision directly.
201
201
202 It exercises some code paths that are hard to reach easily otherwise.
202 It exercises some code paths that are hard to reach easily otherwise.
203 """
203 """
204 dlog = newrevlog(destname, recreate=True)
204 dlog = newrevlog(destname, recreate=True)
205 for r in rlog:
205 for r in rlog:
206 p1 = rlog.node(r - 1)
206 p1 = rlog.node(r - 1)
207 p2 = rlog.nullid
207 p2 = rlog.nullid
208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
208 if r == 0 or (rlog.flags(r) & revlog.REVIDX_EXTSTORED):
209 text = rlog.rawdata(r)
209 text = rlog.rawdata(r)
210 cachedelta = None
210 cachedelta = None
211 else:
211 else:
212 # deltaparent cannot have EXTSTORED flag.
212 # deltaparent cannot have EXTSTORED flag.
213 deltaparent = max(
213 deltaparent = max(
214 [-1]
214 [-1]
215 + [
215 + [
216 p
216 p
217 for p in range(r)
217 for p in range(r)
218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
218 if rlog.flags(p) & revlog.REVIDX_EXTSTORED == 0
219 ]
219 ]
220 )
220 )
221 text = None
221 text = None
222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
222 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
223 flags = rlog.flags(r)
223 flags = rlog.flags(r)
224 with dlog._writing(_NoTransaction()):
224 with dlog._writing(_NoTransaction()):
225 dlog._addrevision(
225 dlog._addrevision(
226 rlog.node(r),
226 rlog.node(r),
227 text,
227 text,
228 tr,
228 tr,
229 r,
229 r,
230 p1,
230 p1,
231 p2,
231 p2,
232 flags,
232 flags,
233 cachedelta,
233 cachedelta,
234 )
234 )
235 return dlog
235 return dlog
236
236
237
237
238 # Utilities to generate revisions for testing
238 # Utilities to generate revisions for testing
239
239
240
240
241 def genbits(n):
241 def genbits(n):
242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
242 """Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
243 i.e. the generated numbers have a width of n bits.
243 i.e. the generated numbers have a width of n bits.
244
244
245 The combination of two adjacent numbers will cover all possible cases.
245 The combination of two adjacent numbers will cover all possible cases.
246 That is to say, given any x, y where both x, and y are in range(2 ** n),
246 That is to say, given any x, y where both x, and y are in range(2 ** n),
247 there is an x followed immediately by y in the generated sequence.
247 there is an x followed immediately by y in the generated sequence.
248 """
248 """
249 m = 2 ** n
249 m = 2 ** n
250
250
251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
251 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
252 gray = lambda x: x ^ (x >> 1)
252 gray = lambda x: x ^ (x >> 1)
253 reversegray = {gray(i): i for i in range(m)}
253 reversegray = {gray(i): i for i in range(m)}
254
254
255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
255 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
256 # the next unused gray code where higher n bits equal to X.
256 # the next unused gray code where higher n bits equal to X.
257
257
258 # For gray codes whose higher bits are X, a[X] of them have been used.
258 # For gray codes whose higher bits are X, a[X] of them have been used.
259 a = [0] * m
259 a = [0] * m
260
260
261 # Iterate from 0.
261 # Iterate from 0.
262 x = 0
262 x = 0
263 yield x
263 yield x
264 for i in range(m * m):
264 for i in range(m * m):
265 x = reversegray[x]
265 x = reversegray[x]
266 y = gray(a[x] + x * m) & (m - 1)
266 y = gray(a[x] + x * m) & (m - 1)
267 assert a[x] < m
267 assert a[x] < m
268 a[x] += 1
268 a[x] += 1
269 x = y
269 x = y
270 yield x
270 yield x
271
271
272
272
273 def gentext(rev):
273 def gentext(rev):
274 '''Given a revision number, generate dummy text'''
274 '''Given a revision number, generate dummy text'''
275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
275 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
276
276
277
277
278 def writecases(rlog, tr):
278 def writecases(rlog, tr):
279 """Write some revisions interested to the test.
279 """Write some revisions interested to the test.
280
280
281 The test is interested in 3 properties of a revision:
281 The test is interested in 3 properties of a revision:
282
282
283 - Is it a delta or a full text? (isdelta)
283 - Is it a delta or a full text? (isdelta)
284 This is to catch some delta application issues.
284 This is to catch some delta application issues.
285 - Does it have a flag of EXTSTORED? (isext)
285 - Does it have a flag of EXTSTORED? (isext)
286 This is to catch some flag processor issues. Especially when
286 This is to catch some flag processor issues. Especially when
287 interacted with revlog deltas.
287 interacted with revlog deltas.
288 - Is its text empty? (isempty)
288 - Is its text empty? (isempty)
289 This is less important. It is intended to try to catch some careless
289 This is less important. It is intended to try to catch some careless
290 checks like "if text" instead of "if text is None". Note: if flag
290 checks like "if text" instead of "if text is None". Note: if flag
291 processor is involved, raw text may be not empty.
291 processor is involved, raw text may be not empty.
292
292
293 Write 65 revisions. So that all combinations of the above flags for
293 Write 65 revisions. So that all combinations of the above flags for
294 adjacent revisions are covered. That is to say,
294 adjacent revisions are covered. That is to say,
295
295
296 len(set(
296 len(set(
297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
297 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
298 for r in range(len(rlog) - 1)
298 for r in range(len(rlog) - 1)
299 )) is 64.
299 )) is 64.
300
300
301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
301 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
302 mentioned above.
302 mentioned above.
303
303
304 Return expected [(text, rawtext)].
304 Return expected [(text, rawtext)].
305 """
305 """
306 result = []
306 result = []
307 for i, x in enumerate(genbits(3)):
307 for i, x in enumerate(genbits(3)):
308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
308 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
309 if isempty:
309 if isempty:
310 text = b''
310 text = b''
311 else:
311 else:
312 text = gentext(i)
312 text = gentext(i)
313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
313 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
314
314
315 # Verify text, rawtext, and rawsize
315 # Verify text, rawtext, and rawsize
316 if isext:
316 if isext:
317 rawtext = writeprocessor(None, text)[0]
317 rawtext = writeprocessor(None, text)[0]
318 else:
318 else:
319 rawtext = text
319 rawtext = text
320 if rlog.rawsize(rev) != len(rawtext):
320 if rlog.rawsize(rev) != len(rawtext):
321 abort('rev %d: wrong rawsize' % rev)
321 abort('rev %d: wrong rawsize' % rev)
322 if rlog.revision(rev) != text:
322 if rlog.revision(rev) != text:
323 abort('rev %d: wrong text' % rev)
323 abort('rev %d: wrong text' % rev)
324 if rlog.rawdata(rev) != rawtext:
324 if rlog.rawdata(rev) != rawtext:
325 abort('rev %d: wrong rawtext' % rev)
325 abort('rev %d: wrong rawtext' % rev)
326 result.append((text, rawtext))
326 result.append((text, rawtext))
327
327
328 # Verify flags like isdelta, isext work as expected
328 # Verify flags like isdelta, isext work as expected
329 # isdelta can be overridden to False if this or p1 has isext set
329 # isdelta can be overridden to False if this or p1 has isext set
330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
330 if bool(rlog.deltaparent(rev) > -1) and not isdelta:
331 abort('rev %d: isdelta is unexpected' % rev)
331 abort('rev %d: isdelta is unexpected' % rev)
332 if bool(rlog.flags(rev)) != isext:
332 if bool(rlog.flags(rev)) != isext:
333 abort('rev %d: isext is ineffective' % rev)
333 abort('rev %d: isext is ineffective' % rev)
334 return result
334 return result
335
335
336
336
337 # Main test and checking
337 # Main test and checking
338
338
339
339
340 def checkrevlog(rlog, expected):
340 def checkrevlog(rlog, expected):
341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
341 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
342 # Test using different access orders. This could expose some issues
342 # Test using different access orders. This could expose some issues
343 # depending on revlog caching (see revlog._cache).
343 # depending on revlog caching (see revlog._cache).
344 for r0 in range(len(rlog) - 1):
344 for r0 in range(len(rlog) - 1):
345 r1 = r0 + 1
345 r1 = r0 + 1
346 for revorder in [[r0, r1], [r1, r0]]:
346 for revorder in [[r0, r1], [r1, r0]]:
347 for raworder in [[True], [False], [True, False], [False, True]]:
347 for raworder in [[True], [False], [True, False], [False, True]]:
348 nlog = newrevlog()
348 nlog = newrevlog()
349 for rev in revorder:
349 for rev in revorder:
350 for raw in raworder:
350 for raw in raworder:
351 if raw:
351 if raw:
352 t = nlog.rawdata(rev)
352 t = nlog.rawdata(rev)
353 else:
353 else:
354 t = nlog.revision(rev)
354 t = nlog.revision(rev)
355 if t != expected[rev][int(raw)]:
355 if t != expected[rev][int(raw)]:
356 abort(
356 abort(
357 'rev %d: corrupted %stext'
357 'rev %d: corrupted %stext'
358 % (rev, raw and 'raw' or '')
358 % (rev, raw and 'raw' or '')
359 )
359 )
360
360
361
361
362 slicingdata = [
362 slicingdata = [
363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
363 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
364 ([0, 1, 2, 3, 55, 56, 58, 59, 60], [[0, 1], [2], [58], [59, 60]], 10),
365 (
365 (
366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
366 [-1, 0, 1, 2, 3, 55, 56, 58, 59, 60],
367 [[-1, 0, 1], [2], [58], [59, 60]],
367 [[-1, 0, 1], [2], [58], [59, 60]],
368 10,
368 10,
369 ),
369 ),
370 ]
370 ]
371
371
372
372
373 def slicingtest(rlog):
373 def slicingtest(rlog):
374 oldmin = rlog._srmingapsize
374 old_delta_config = rlog.delta_config
375 old_data_config = rlog.data_config
376 rlog.delta_config = rlog.delta_config.copy()
377 rlog.data_config = rlog.data_config.copy()
375 try:
378 try:
376 # the test revlog is small, we remove the floor under which we
379 # the test revlog is small, we remove the floor under which we
377 # slicing is diregarded.
380 # slicing is diregarded.
378 rlog.data_config.sr_min_gap_size = 0
381 rlog.data_config.sr_min_gap_size = 0
379 rlog.delta_config.sr_min_gap_size = 0
382 rlog.delta_config.sr_min_gap_size = 0
380 for item in slicingdata:
383 for item in slicingdata:
381 chain, expected, target = item
384 chain, expected, target = item
382 result = deltas.slicechunk(rlog, chain, targetsize=target)
385 result = deltas.slicechunk(rlog, chain, targetsize=target)
383 result = list(result)
386 result = list(result)
384 if result != expected:
387 if result != expected:
385 print('slicing differ:')
388 print('slicing differ:')
386 print(' chain: %s' % chain)
389 print(' chain: %s' % chain)
387 print(' target: %s' % target)
390 print(' target: %s' % target)
388 print(' expected: %s' % expected)
391 print(' expected: %s' % expected)
389 print(' result: %s' % result)
392 print(' result: %s' % result)
390 finally:
393 finally:
391 rlog.data_config.sr_min_gap_size = oldmin
394 rlog.delta_config = old_delta_config
392 rlog.delta_config.sr_min_gap_size = oldmin
395 rlog.data_config = old_data_config
393
396
394
397
395 def md5sum(s):
398 def md5sum(s):
396 return hashlib.md5(s).digest()
399 return hashlib.md5(s).digest()
397
400
398
401
399 def _maketext(*coord):
402 def _maketext(*coord):
400 """create piece of text according to range of integers
403 """create piece of text according to range of integers
401
404
402 The test returned use a md5sum of the integer to make it less
405 The test returned use a md5sum of the integer to make it less
403 compressible"""
406 compressible"""
404 pieces = []
407 pieces = []
405 for start, size in coord:
408 for start, size in coord:
406 num = range(start, start + size)
409 num = range(start, start + size)
407 p = [md5sum(b'%d' % r) for r in num]
410 p = [md5sum(b'%d' % r) for r in num]
408 pieces.append(b'\n'.join(p))
411 pieces.append(b'\n'.join(p))
409 return b'\n'.join(pieces) + b'\n'
412 return b'\n'.join(pieces) + b'\n'
410
413
411
414
412 data = [
415 data = [
413 _maketext((0, 120), (456, 60)),
416 _maketext((0, 120), (456, 60)),
414 _maketext((0, 120), (345, 60)),
417 _maketext((0, 120), (345, 60)),
415 _maketext((0, 120), (734, 60)),
418 _maketext((0, 120), (734, 60)),
416 _maketext((0, 120), (734, 60), (923, 45)),
419 _maketext((0, 120), (734, 60), (923, 45)),
417 _maketext((0, 120), (734, 60), (234, 45)),
420 _maketext((0, 120), (734, 60), (234, 45)),
418 _maketext((0, 120), (734, 60), (564, 45)),
421 _maketext((0, 120), (734, 60), (564, 45)),
419 _maketext((0, 120), (734, 60), (361, 45)),
422 _maketext((0, 120), (734, 60), (361, 45)),
420 _maketext((0, 120), (734, 60), (489, 45)),
423 _maketext((0, 120), (734, 60), (489, 45)),
421 _maketext((0, 120), (123, 60)),
424 _maketext((0, 120), (123, 60)),
422 _maketext((0, 120), (145, 60)),
425 _maketext((0, 120), (145, 60)),
423 _maketext((0, 120), (104, 60)),
426 _maketext((0, 120), (104, 60)),
424 _maketext((0, 120), (430, 60)),
427 _maketext((0, 120), (430, 60)),
425 _maketext((0, 120), (430, 60), (923, 45)),
428 _maketext((0, 120), (430, 60), (923, 45)),
426 _maketext((0, 120), (430, 60), (234, 45)),
429 _maketext((0, 120), (430, 60), (234, 45)),
427 _maketext((0, 120), (430, 60), (564, 45)),
430 _maketext((0, 120), (430, 60), (564, 45)),
428 _maketext((0, 120), (430, 60), (361, 45)),
431 _maketext((0, 120), (430, 60), (361, 45)),
429 _maketext((0, 120), (430, 60), (489, 45)),
432 _maketext((0, 120), (430, 60), (489, 45)),
430 _maketext((0, 120), (249, 60)),
433 _maketext((0, 120), (249, 60)),
431 _maketext((0, 120), (832, 60)),
434 _maketext((0, 120), (832, 60)),
432 _maketext((0, 120), (891, 60)),
435 _maketext((0, 120), (891, 60)),
433 _maketext((0, 120), (543, 60)),
436 _maketext((0, 120), (543, 60)),
434 _maketext((0, 120), (120, 60)),
437 _maketext((0, 120), (120, 60)),
435 _maketext((0, 120), (60, 60), (768, 30)),
438 _maketext((0, 120), (60, 60), (768, 30)),
436 _maketext((0, 120), (60, 60), (260, 30)),
439 _maketext((0, 120), (60, 60), (260, 30)),
437 _maketext((0, 120), (60, 60), (450, 30)),
440 _maketext((0, 120), (60, 60), (450, 30)),
438 _maketext((0, 120), (60, 60), (361, 30)),
441 _maketext((0, 120), (60, 60), (361, 30)),
439 _maketext((0, 120), (60, 60), (886, 30)),
442 _maketext((0, 120), (60, 60), (886, 30)),
440 _maketext((0, 120), (60, 60), (116, 30)),
443 _maketext((0, 120), (60, 60), (116, 30)),
441 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
444 _maketext((0, 120), (60, 60), (567, 30), (629, 40)),
442 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
445 _maketext((0, 120), (60, 60), (569, 30), (745, 40)),
443 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
446 _maketext((0, 120), (60, 60), (777, 30), (700, 40)),
444 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
447 _maketext((0, 120), (60, 60), (618, 30), (398, 40), (158, 10)),
445 ]
448 ]
446
449
447
450
448 def makesnapshot(tr):
451 def makesnapshot(tr):
449 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
452 rl = newrevlog(name=b'_snaprevlog3', recreate=True)
450 for i in data:
453 for i in data:
451 appendrev(rl, i, tr)
454 appendrev(rl, i, tr)
452 return rl
455 return rl
453
456
454
457
455 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
458 snapshots = [-1, 0, 6, 8, 11, 17, 19, 21, 25, 30]
456
459
457
460
458 def issnapshottest(rlog):
461 def issnapshottest(rlog):
459 result = []
462 result = []
460 if rlog.issnapshot(-1):
463 if rlog.issnapshot(-1):
461 result.append(-1)
464 result.append(-1)
462 for rev in rlog:
465 for rev in rlog:
463 if rlog.issnapshot(rev):
466 if rlog.issnapshot(rev):
464 result.append(rev)
467 result.append(rev)
465 if snapshots != result:
468 if snapshots != result:
466 print('snapshot differ:')
469 print('snapshot differ:')
467 print(' expected: %s' % snapshots)
470 print(' expected: %s' % snapshots)
468 print(' got: %s' % result)
471 print(' got: %s' % result)
469
472
470
473
471 snapshotmapall = {0: {6, 8, 11, 17, 19, 25}, 8: {21}, -1: {0, 30}}
474 snapshotmapall = {0: {6, 8, 11, 17, 19, 25}, 8: {21}, -1: {0, 30}}
472 snapshotmap15 = {0: {17, 19, 25}, 8: {21}, -1: {30}}
475 snapshotmap15 = {0: {17, 19, 25}, 8: {21}, -1: {30}}
473
476
474
477
475 def findsnapshottest(rlog):
478 def findsnapshottest(rlog):
476 cache = deltas.SnapshotCache()
479 cache = deltas.SnapshotCache()
477 cache.update(rlog)
480 cache.update(rlog)
478 resultall = dict(cache.snapshots)
481 resultall = dict(cache.snapshots)
479 if resultall != snapshotmapall:
482 if resultall != snapshotmapall:
480 print('snapshot map differ:')
483 print('snapshot map differ:')
481 print(' expected: %s' % snapshotmapall)
484 print(' expected: %s' % snapshotmapall)
482 print(' got: %s' % resultall)
485 print(' got: %s' % resultall)
483 cache15 = deltas.SnapshotCache()
486 cache15 = deltas.SnapshotCache()
484 cache15.update(rlog, 15)
487 cache15.update(rlog, 15)
485 result15 = dict(cache15.snapshots)
488 result15 = dict(cache15.snapshots)
486 if result15 != snapshotmap15:
489 if result15 != snapshotmap15:
487 print('snapshot map differ:')
490 print('snapshot map differ:')
488 print(' expected: %s' % snapshotmap15)
491 print(' expected: %s' % snapshotmap15)
489 print(' got: %s' % result15)
492 print(' got: %s' % result15)
490
493
491
494
492 def maintest():
495 def maintest():
493 with newtransaction() as tr:
496 with newtransaction() as tr:
494 rl = newrevlog(recreate=True)
497 rl = newrevlog(recreate=True)
495 expected = writecases(rl, tr)
498 expected = writecases(rl, tr)
496 checkrevlog(rl, expected)
499 checkrevlog(rl, expected)
497 print('local test passed')
500 print('local test passed')
498 # Copy via revlog.addgroup
501 # Copy via revlog.addgroup
499 rl1 = addgroupcopy(rl, tr)
502 rl1 = addgroupcopy(rl, tr)
500 checkrevlog(rl1, expected)
503 checkrevlog(rl1, expected)
501 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
504 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
502 checkrevlog(rl2, expected)
505 checkrevlog(rl2, expected)
503 print('addgroupcopy test passed')
506 print('addgroupcopy test passed')
504 # Copy via revlog.clone
507 # Copy via revlog.clone
505 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
508 rl3 = newrevlog(name=b'_destrevlog3', recreate=True)
506 rl.clone(tr, rl3)
509 rl.clone(tr, rl3)
507 checkrevlog(rl3, expected)
510 checkrevlog(rl3, expected)
508 print('clone test passed')
511 print('clone test passed')
509 # Copy via low-level revlog._addrevision
512 # Copy via low-level revlog._addrevision
510 rl4 = lowlevelcopy(rl, tr)
513 rl4 = lowlevelcopy(rl, tr)
511 checkrevlog(rl4, expected)
514 checkrevlog(rl4, expected)
512 print('lowlevelcopy test passed')
515 print('lowlevelcopy test passed')
513 slicingtest(rl)
516 slicingtest(rl)
514 print('slicing test passed')
517 print('slicing test passed')
515 rl5 = makesnapshot(tr)
518 rl5 = makesnapshot(tr)
516 issnapshottest(rl5)
519 issnapshottest(rl5)
517 print('issnapshot test passed')
520 print('issnapshot test passed')
518 findsnapshottest(rl5)
521 findsnapshottest(rl5)
519 print('findsnapshot test passed')
522 print('findsnapshot test passed')
520
523
521
524
522 try:
525 try:
523 maintest()
526 maintest()
524 except Exception as ex:
527 except Exception as ex:
525 abort('crashed: %s' % ex)
528 abort('crashed: %s' % ex)
General Comments 0
You need to be logged in to leave comments. Login now