##// END OF EJS Templates
revlog: fix misleading comment about _maxinline
Arseniy Alekseyev -
r50723:9854a9ad default
parent child Browse files
Show More
@@ -1,3385 +1,3385 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullrev,
28 nullrev,
29 sha1nodeconstants,
29 sha1nodeconstants,
30 short,
30 short,
31 wdirrev,
31 wdirrev,
32 )
32 )
33 from .i18n import _
33 from .i18n import _
34 from .pycompat import getattr
34 from .pycompat import getattr
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of revlog with inline data
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 def __init__(
292 def __init__(
293 self,
293 self,
294 opener,
294 opener,
295 target,
295 target,
296 radix,
296 radix,
297 postfix=None, # only exist for `tmpcensored` now
297 postfix=None, # only exist for `tmpcensored` now
298 checkambig=False,
298 checkambig=False,
299 mmaplargeindex=False,
299 mmaplargeindex=False,
300 censorable=False,
300 censorable=False,
301 upperboundcomp=None,
301 upperboundcomp=None,
302 persistentnodemap=False,
302 persistentnodemap=False,
303 concurrencychecker=None,
303 concurrencychecker=None,
304 trypending=False,
304 trypending=False,
305 canonical_parent_order=True,
305 canonical_parent_order=True,
306 ):
306 ):
307 """
307 """
308 create a revlog object
308 create a revlog object
309
309
310 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
312
312
313 `target`: a (KIND, ID) tuple that identify the content stored in
313 `target`: a (KIND, ID) tuple that identify the content stored in
314 this revlog. It help the rest of the code to understand what the revlog
314 this revlog. It help the rest of the code to understand what the revlog
315 is about without having to resort to heuristic and index filename
315 is about without having to resort to heuristic and index filename
316 analysis. Note: that this must be reliably be set by normal code, but
316 analysis. Note: that this must be reliably be set by normal code, but
317 that test, debug, or performance measurement code might not set this to
317 that test, debug, or performance measurement code might not set this to
318 accurate value.
318 accurate value.
319 """
319 """
320 self.upperboundcomp = upperboundcomp
320 self.upperboundcomp = upperboundcomp
321
321
322 self.radix = radix
322 self.radix = radix
323
323
324 self._docket_file = None
324 self._docket_file = None
325 self._indexfile = None
325 self._indexfile = None
326 self._datafile = None
326 self._datafile = None
327 self._sidedatafile = None
327 self._sidedatafile = None
328 self._nodemap_file = None
328 self._nodemap_file = None
329 self.postfix = postfix
329 self.postfix = postfix
330 self._trypending = trypending
330 self._trypending = trypending
331 self.opener = opener
331 self.opener = opener
332 if persistentnodemap:
332 if persistentnodemap:
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334
334
335 assert target[0] in ALL_KINDS
335 assert target[0] in ALL_KINDS
336 assert len(target) == 2
336 assert len(target) == 2
337 self.target = target
337 self.target = target
338 # When True, indexfile is opened with checkambig=True at writing, to
338 # When True, indexfile is opened with checkambig=True at writing, to
339 # avoid file stat ambiguity.
339 # avoid file stat ambiguity.
340 self._checkambig = checkambig
340 self._checkambig = checkambig
341 self._mmaplargeindex = mmaplargeindex
341 self._mmaplargeindex = mmaplargeindex
342 self._censorable = censorable
342 self._censorable = censorable
343 # 3-tuple of (node, rev, text) for a raw revision.
343 # 3-tuple of (node, rev, text) for a raw revision.
344 self._revisioncache = None
344 self._revisioncache = None
345 # Maps rev to chain base rev.
345 # Maps rev to chain base rev.
346 self._chainbasecache = util.lrucachedict(100)
346 self._chainbasecache = util.lrucachedict(100)
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 self._chunkcache = (0, b'')
348 self._chunkcache = (0, b'')
349 # How much data to read and cache into the raw revlog data cache.
349 # How much data to read and cache into the raw revlog data cache.
350 self._chunkcachesize = 65536
350 self._chunkcachesize = 65536
351 self._maxchainlen = None
351 self._maxchainlen = None
352 self._deltabothparents = True
352 self._deltabothparents = True
353 self._candidate_group_chunk_size = 0
353 self._candidate_group_chunk_size = 0
354 self._debug_delta = False
354 self._debug_delta = False
355 self.index = None
355 self.index = None
356 self._docket = None
356 self._docket = None
357 self._nodemap_docket = None
357 self._nodemap_docket = None
358 # Mapping of partial identifiers to full nodes.
358 # Mapping of partial identifiers to full nodes.
359 self._pcache = {}
359 self._pcache = {}
360 # Mapping of revision integer to full node.
360 # Mapping of revision integer to full node.
361 self._compengine = b'zlib'
361 self._compengine = b'zlib'
362 self._compengineopts = {}
362 self._compengineopts = {}
363 self._maxdeltachainspan = -1
363 self._maxdeltachainspan = -1
364 self._withsparseread = False
364 self._withsparseread = False
365 self._sparserevlog = False
365 self._sparserevlog = False
366 self.hassidedata = False
366 self.hassidedata = False
367 self._srdensitythreshold = 0.50
367 self._srdensitythreshold = 0.50
368 self._srmingapsize = 262144
368 self._srmingapsize = 262144
369
369
370 # other optionnals features
370 # other optionnals features
371
371
372 # might remove rank configuration once the computation has no impact
372 # might remove rank configuration once the computation has no impact
373 self._compute_rank = False
373 self._compute_rank = False
374
374
375 # Make copy of flag processors so each revlog instance can support
375 # Make copy of flag processors so each revlog instance can support
376 # custom flags.
376 # custom flags.
377 self._flagprocessors = dict(flagutil.flagprocessors)
377 self._flagprocessors = dict(flagutil.flagprocessors)
378
378
379 # 3-tuple of file handles being used for active writing.
379 # 3-tuple of file handles being used for active writing.
380 self._writinghandles = None
380 self._writinghandles = None
381 # prevent nesting of addgroup
381 # prevent nesting of addgroup
382 self._adding_group = None
382 self._adding_group = None
383
383
384 self._loadindex()
384 self._loadindex()
385
385
386 self._concurrencychecker = concurrencychecker
386 self._concurrencychecker = concurrencychecker
387
387
388 # parent order is supposed to be semantically irrelevant, so we
388 # parent order is supposed to be semantically irrelevant, so we
389 # normally resort parents to ensure that the first parent is non-null,
389 # normally resort parents to ensure that the first parent is non-null,
390 # if there is a non-null parent at all.
390 # if there is a non-null parent at all.
391 # filelog abuses the parent order as flag to mark some instances of
391 # filelog abuses the parent order as flag to mark some instances of
392 # meta-encoded files, so allow it to disable this behavior.
392 # meta-encoded files, so allow it to disable this behavior.
393 self.canonical_parent_order = canonical_parent_order
393 self.canonical_parent_order = canonical_parent_order
394
394
395 def _init_opts(self):
395 def _init_opts(self):
396 """process options (from above/config) to setup associated default revlog mode
396 """process options (from above/config) to setup associated default revlog mode
397
397
398 These values might be affected when actually reading on disk information.
398 These values might be affected when actually reading on disk information.
399
399
400 The relevant values are returned for use in _loadindex().
400 The relevant values are returned for use in _loadindex().
401
401
402 * newversionflags:
402 * newversionflags:
403 version header to use if we need to create a new revlog
403 version header to use if we need to create a new revlog
404
404
405 * mmapindexthreshold:
405 * mmapindexthreshold:
406 minimal index size for start to use mmap
406 minimal index size for start to use mmap
407
407
408 * force_nodemap:
408 * force_nodemap:
409 force the usage of a "development" version of the nodemap code
409 force the usage of a "development" version of the nodemap code
410 """
410 """
411 mmapindexthreshold = None
411 mmapindexthreshold = None
412 opts = self.opener.options
412 opts = self.opener.options
413
413
414 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
414 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
415 new_header = CHANGELOGV2
415 new_header = CHANGELOGV2
416 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
416 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
417 elif b'revlogv2' in opts:
417 elif b'revlogv2' in opts:
418 new_header = REVLOGV2
418 new_header = REVLOGV2
419 elif b'revlogv1' in opts:
419 elif b'revlogv1' in opts:
420 new_header = REVLOGV1 | FLAG_INLINE_DATA
420 new_header = REVLOGV1 | FLAG_INLINE_DATA
421 if b'generaldelta' in opts:
421 if b'generaldelta' in opts:
422 new_header |= FLAG_GENERALDELTA
422 new_header |= FLAG_GENERALDELTA
423 elif b'revlogv0' in self.opener.options:
423 elif b'revlogv0' in self.opener.options:
424 new_header = REVLOGV0
424 new_header = REVLOGV0
425 else:
425 else:
426 new_header = REVLOG_DEFAULT_VERSION
426 new_header = REVLOG_DEFAULT_VERSION
427
427
428 if b'chunkcachesize' in opts:
428 if b'chunkcachesize' in opts:
429 self._chunkcachesize = opts[b'chunkcachesize']
429 self._chunkcachesize = opts[b'chunkcachesize']
430 if b'maxchainlen' in opts:
430 if b'maxchainlen' in opts:
431 self._maxchainlen = opts[b'maxchainlen']
431 self._maxchainlen = opts[b'maxchainlen']
432 if b'deltabothparents' in opts:
432 if b'deltabothparents' in opts:
433 self._deltabothparents = opts[b'deltabothparents']
433 self._deltabothparents = opts[b'deltabothparents']
434 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
434 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
435 if dps_cgds:
435 if dps_cgds:
436 self._candidate_group_chunk_size = dps_cgds
436 self._candidate_group_chunk_size = dps_cgds
437 self._lazydelta = bool(opts.get(b'lazydelta', True))
437 self._lazydelta = bool(opts.get(b'lazydelta', True))
438 self._lazydeltabase = False
438 self._lazydeltabase = False
439 if self._lazydelta:
439 if self._lazydelta:
440 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
440 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
441 if b'debug-delta' in opts:
441 if b'debug-delta' in opts:
442 self._debug_delta = opts[b'debug-delta']
442 self._debug_delta = opts[b'debug-delta']
443 if b'compengine' in opts:
443 if b'compengine' in opts:
444 self._compengine = opts[b'compengine']
444 self._compengine = opts[b'compengine']
445 if b'zlib.level' in opts:
445 if b'zlib.level' in opts:
446 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
446 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
447 if b'zstd.level' in opts:
447 if b'zstd.level' in opts:
448 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
448 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
449 if b'maxdeltachainspan' in opts:
449 if b'maxdeltachainspan' in opts:
450 self._maxdeltachainspan = opts[b'maxdeltachainspan']
450 self._maxdeltachainspan = opts[b'maxdeltachainspan']
451 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
451 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
452 mmapindexthreshold = opts[b'mmapindexthreshold']
452 mmapindexthreshold = opts[b'mmapindexthreshold']
453 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
453 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
454 withsparseread = bool(opts.get(b'with-sparse-read', False))
454 withsparseread = bool(opts.get(b'with-sparse-read', False))
455 # sparse-revlog forces sparse-read
455 # sparse-revlog forces sparse-read
456 self._withsparseread = self._sparserevlog or withsparseread
456 self._withsparseread = self._sparserevlog or withsparseread
457 if b'sparse-read-density-threshold' in opts:
457 if b'sparse-read-density-threshold' in opts:
458 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
458 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
459 if b'sparse-read-min-gap-size' in opts:
459 if b'sparse-read-min-gap-size' in opts:
460 self._srmingapsize = opts[b'sparse-read-min-gap-size']
460 self._srmingapsize = opts[b'sparse-read-min-gap-size']
461 if opts.get(b'enableellipsis'):
461 if opts.get(b'enableellipsis'):
462 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
462 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
463
463
464 # revlog v0 doesn't have flag processors
464 # revlog v0 doesn't have flag processors
465 for flag, processor in opts.get(b'flagprocessors', {}).items():
465 for flag, processor in opts.get(b'flagprocessors', {}).items():
466 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
466 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
467
467
468 if self._chunkcachesize <= 0:
468 if self._chunkcachesize <= 0:
469 raise error.RevlogError(
469 raise error.RevlogError(
470 _(b'revlog chunk cache size %r is not greater than 0')
470 _(b'revlog chunk cache size %r is not greater than 0')
471 % self._chunkcachesize
471 % self._chunkcachesize
472 )
472 )
473 elif self._chunkcachesize & (self._chunkcachesize - 1):
473 elif self._chunkcachesize & (self._chunkcachesize - 1):
474 raise error.RevlogError(
474 raise error.RevlogError(
475 _(b'revlog chunk cache size %r is not a power of 2')
475 _(b'revlog chunk cache size %r is not a power of 2')
476 % self._chunkcachesize
476 % self._chunkcachesize
477 )
477 )
478 force_nodemap = opts.get(b'devel-force-nodemap', False)
478 force_nodemap = opts.get(b'devel-force-nodemap', False)
479 return new_header, mmapindexthreshold, force_nodemap
479 return new_header, mmapindexthreshold, force_nodemap
480
480
481 def _get_data(self, filepath, mmap_threshold, size=None):
481 def _get_data(self, filepath, mmap_threshold, size=None):
482 """return a file content with or without mmap
482 """return a file content with or without mmap
483
483
484 If the file is missing return the empty string"""
484 If the file is missing return the empty string"""
485 try:
485 try:
486 with self.opener(filepath) as fp:
486 with self.opener(filepath) as fp:
487 if mmap_threshold is not None:
487 if mmap_threshold is not None:
488 file_size = self.opener.fstat(fp).st_size
488 file_size = self.opener.fstat(fp).st_size
489 if file_size >= mmap_threshold:
489 if file_size >= mmap_threshold:
490 if size is not None:
490 if size is not None:
491 # avoid potentiel mmap crash
491 # avoid potentiel mmap crash
492 size = min(file_size, size)
492 size = min(file_size, size)
493 # TODO: should .close() to release resources without
493 # TODO: should .close() to release resources without
494 # relying on Python GC
494 # relying on Python GC
495 if size is None:
495 if size is None:
496 return util.buffer(util.mmapread(fp))
496 return util.buffer(util.mmapread(fp))
497 else:
497 else:
498 return util.buffer(util.mmapread(fp, size))
498 return util.buffer(util.mmapread(fp, size))
499 if size is None:
499 if size is None:
500 return fp.read()
500 return fp.read()
501 else:
501 else:
502 return fp.read(size)
502 return fp.read(size)
503 except FileNotFoundError:
503 except FileNotFoundError:
504 return b''
504 return b''
505
505
506 def _loadindex(self, docket=None):
506 def _loadindex(self, docket=None):
507
507
508 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
508 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
509
509
510 if self.postfix is not None:
510 if self.postfix is not None:
511 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
511 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
512 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
512 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
513 entry_point = b'%s.i.a' % self.radix
513 entry_point = b'%s.i.a' % self.radix
514 else:
514 else:
515 entry_point = b'%s.i' % self.radix
515 entry_point = b'%s.i' % self.radix
516
516
517 if docket is not None:
517 if docket is not None:
518 self._docket = docket
518 self._docket = docket
519 self._docket_file = entry_point
519 self._docket_file = entry_point
520 else:
520 else:
521 self._initempty = True
521 self._initempty = True
522 entry_data = self._get_data(entry_point, mmapindexthreshold)
522 entry_data = self._get_data(entry_point, mmapindexthreshold)
523 if len(entry_data) > 0:
523 if len(entry_data) > 0:
524 header = INDEX_HEADER.unpack(entry_data[:4])[0]
524 header = INDEX_HEADER.unpack(entry_data[:4])[0]
525 self._initempty = False
525 self._initempty = False
526 else:
526 else:
527 header = new_header
527 header = new_header
528
528
529 self._format_flags = header & ~0xFFFF
529 self._format_flags = header & ~0xFFFF
530 self._format_version = header & 0xFFFF
530 self._format_version = header & 0xFFFF
531
531
532 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
532 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
533 if supported_flags is None:
533 if supported_flags is None:
534 msg = _(b'unknown version (%d) in revlog %s')
534 msg = _(b'unknown version (%d) in revlog %s')
535 msg %= (self._format_version, self.display_id)
535 msg %= (self._format_version, self.display_id)
536 raise error.RevlogError(msg)
536 raise error.RevlogError(msg)
537 elif self._format_flags & ~supported_flags:
537 elif self._format_flags & ~supported_flags:
538 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
538 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
539 display_flag = self._format_flags >> 16
539 display_flag = self._format_flags >> 16
540 msg %= (display_flag, self._format_version, self.display_id)
540 msg %= (display_flag, self._format_version, self.display_id)
541 raise error.RevlogError(msg)
541 raise error.RevlogError(msg)
542
542
543 features = FEATURES_BY_VERSION[self._format_version]
543 features = FEATURES_BY_VERSION[self._format_version]
544 self._inline = features[b'inline'](self._format_flags)
544 self._inline = features[b'inline'](self._format_flags)
545 self._generaldelta = features[b'generaldelta'](self._format_flags)
545 self._generaldelta = features[b'generaldelta'](self._format_flags)
546 self.hassidedata = features[b'sidedata']
546 self.hassidedata = features[b'sidedata']
547
547
548 if not features[b'docket']:
548 if not features[b'docket']:
549 self._indexfile = entry_point
549 self._indexfile = entry_point
550 index_data = entry_data
550 index_data = entry_data
551 else:
551 else:
552 self._docket_file = entry_point
552 self._docket_file = entry_point
553 if self._initempty:
553 if self._initempty:
554 self._docket = docketutil.default_docket(self, header)
554 self._docket = docketutil.default_docket(self, header)
555 else:
555 else:
556 self._docket = docketutil.parse_docket(
556 self._docket = docketutil.parse_docket(
557 self, entry_data, use_pending=self._trypending
557 self, entry_data, use_pending=self._trypending
558 )
558 )
559
559
560 if self._docket is not None:
560 if self._docket is not None:
561 self._indexfile = self._docket.index_filepath()
561 self._indexfile = self._docket.index_filepath()
562 index_data = b''
562 index_data = b''
563 index_size = self._docket.index_end
563 index_size = self._docket.index_end
564 if index_size > 0:
564 if index_size > 0:
565 index_data = self._get_data(
565 index_data = self._get_data(
566 self._indexfile, mmapindexthreshold, size=index_size
566 self._indexfile, mmapindexthreshold, size=index_size
567 )
567 )
568 if len(index_data) < index_size:
568 if len(index_data) < index_size:
569 msg = _(b'too few index data for %s: got %d, expected %d')
569 msg = _(b'too few index data for %s: got %d, expected %d')
570 msg %= (self.display_id, len(index_data), index_size)
570 msg %= (self.display_id, len(index_data), index_size)
571 raise error.RevlogError(msg)
571 raise error.RevlogError(msg)
572
572
573 self._inline = False
573 self._inline = False
574 # generaldelta implied by version 2 revlogs.
574 # generaldelta implied by version 2 revlogs.
575 self._generaldelta = True
575 self._generaldelta = True
576 # the logic for persistent nodemap will be dealt with within the
576 # the logic for persistent nodemap will be dealt with within the
577 # main docket, so disable it for now.
577 # main docket, so disable it for now.
578 self._nodemap_file = None
578 self._nodemap_file = None
579
579
580 if self._docket is not None:
580 if self._docket is not None:
581 self._datafile = self._docket.data_filepath()
581 self._datafile = self._docket.data_filepath()
582 self._sidedatafile = self._docket.sidedata_filepath()
582 self._sidedatafile = self._docket.sidedata_filepath()
583 elif self.postfix is None:
583 elif self.postfix is None:
584 self._datafile = b'%s.d' % self.radix
584 self._datafile = b'%s.d' % self.radix
585 else:
585 else:
586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
587
587
588 self.nodeconstants = sha1nodeconstants
588 self.nodeconstants = sha1nodeconstants
589 self.nullid = self.nodeconstants.nullid
589 self.nullid = self.nodeconstants.nullid
590
590
591 # sparse-revlog can't be on without general-delta (issue6056)
591 # sparse-revlog can't be on without general-delta (issue6056)
592 if not self._generaldelta:
592 if not self._generaldelta:
593 self._sparserevlog = False
593 self._sparserevlog = False
594
594
595 self._storedeltachains = True
595 self._storedeltachains = True
596
596
597 devel_nodemap = (
597 devel_nodemap = (
598 self._nodemap_file
598 self._nodemap_file
599 and force_nodemap
599 and force_nodemap
600 and parse_index_v1_nodemap is not None
600 and parse_index_v1_nodemap is not None
601 )
601 )
602
602
603 use_rust_index = False
603 use_rust_index = False
604 if rustrevlog is not None:
604 if rustrevlog is not None:
605 if self._nodemap_file is not None:
605 if self._nodemap_file is not None:
606 use_rust_index = True
606 use_rust_index = True
607 else:
607 else:
608 use_rust_index = self.opener.options.get(b'rust.index')
608 use_rust_index = self.opener.options.get(b'rust.index')
609
609
610 self._parse_index = parse_index_v1
610 self._parse_index = parse_index_v1
611 if self._format_version == REVLOGV0:
611 if self._format_version == REVLOGV0:
612 self._parse_index = revlogv0.parse_index_v0
612 self._parse_index = revlogv0.parse_index_v0
613 elif self._format_version == REVLOGV2:
613 elif self._format_version == REVLOGV2:
614 self._parse_index = parse_index_v2
614 self._parse_index = parse_index_v2
615 elif self._format_version == CHANGELOGV2:
615 elif self._format_version == CHANGELOGV2:
616 self._parse_index = parse_index_cl_v2
616 self._parse_index = parse_index_cl_v2
617 elif devel_nodemap:
617 elif devel_nodemap:
618 self._parse_index = parse_index_v1_nodemap
618 self._parse_index = parse_index_v1_nodemap
619 elif use_rust_index:
619 elif use_rust_index:
620 self._parse_index = parse_index_v1_mixed
620 self._parse_index = parse_index_v1_mixed
621 try:
621 try:
622 d = self._parse_index(index_data, self._inline)
622 d = self._parse_index(index_data, self._inline)
623 index, chunkcache = d
623 index, chunkcache = d
624 use_nodemap = (
624 use_nodemap = (
625 not self._inline
625 not self._inline
626 and self._nodemap_file is not None
626 and self._nodemap_file is not None
627 and util.safehasattr(index, 'update_nodemap_data')
627 and util.safehasattr(index, 'update_nodemap_data')
628 )
628 )
629 if use_nodemap:
629 if use_nodemap:
630 nodemap_data = nodemaputil.persisted_data(self)
630 nodemap_data = nodemaputil.persisted_data(self)
631 if nodemap_data is not None:
631 if nodemap_data is not None:
632 docket = nodemap_data[0]
632 docket = nodemap_data[0]
633 if (
633 if (
634 len(d[0]) > docket.tip_rev
634 len(d[0]) > docket.tip_rev
635 and d[0][docket.tip_rev][7] == docket.tip_node
635 and d[0][docket.tip_rev][7] == docket.tip_node
636 ):
636 ):
637 # no changelog tampering
637 # no changelog tampering
638 self._nodemap_docket = docket
638 self._nodemap_docket = docket
639 index.update_nodemap_data(*nodemap_data)
639 index.update_nodemap_data(*nodemap_data)
640 except (ValueError, IndexError):
640 except (ValueError, IndexError):
641 raise error.RevlogError(
641 raise error.RevlogError(
642 _(b"index %s is corrupted") % self.display_id
642 _(b"index %s is corrupted") % self.display_id
643 )
643 )
644 self.index = index
644 self.index = index
645 self._segmentfile = randomaccessfile.randomaccessfile(
645 self._segmentfile = randomaccessfile.randomaccessfile(
646 self.opener,
646 self.opener,
647 (self._indexfile if self._inline else self._datafile),
647 (self._indexfile if self._inline else self._datafile),
648 self._chunkcachesize,
648 self._chunkcachesize,
649 chunkcache,
649 chunkcache,
650 )
650 )
651 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
651 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
652 self.opener,
652 self.opener,
653 self._sidedatafile,
653 self._sidedatafile,
654 self._chunkcachesize,
654 self._chunkcachesize,
655 )
655 )
656 # revnum -> (chain-length, sum-delta-length)
656 # revnum -> (chain-length, sum-delta-length)
657 self._chaininfocache = util.lrucachedict(500)
657 self._chaininfocache = util.lrucachedict(500)
658 # revlog header -> revlog compressor
658 # revlog header -> revlog compressor
659 self._decompressors = {}
659 self._decompressors = {}
660
660
661 @util.propertycache
661 @util.propertycache
662 def revlog_kind(self):
662 def revlog_kind(self):
663 return self.target[0]
663 return self.target[0]
664
664
665 @util.propertycache
665 @util.propertycache
666 def display_id(self):
666 def display_id(self):
667 """The public facing "ID" of the revlog that we use in message"""
667 """The public facing "ID" of the revlog that we use in message"""
668 if self.revlog_kind == KIND_FILELOG:
668 if self.revlog_kind == KIND_FILELOG:
669 # Reference the file without the "data/" prefix, so it is familiar
669 # Reference the file without the "data/" prefix, so it is familiar
670 # to the user.
670 # to the user.
671 return self.target[1]
671 return self.target[1]
672 else:
672 else:
673 return self.radix
673 return self.radix
674
674
675 def _get_decompressor(self, t):
675 def _get_decompressor(self, t):
676 try:
676 try:
677 compressor = self._decompressors[t]
677 compressor = self._decompressors[t]
678 except KeyError:
678 except KeyError:
679 try:
679 try:
680 engine = util.compengines.forrevlogheader(t)
680 engine = util.compengines.forrevlogheader(t)
681 compressor = engine.revlogcompressor(self._compengineopts)
681 compressor = engine.revlogcompressor(self._compengineopts)
682 self._decompressors[t] = compressor
682 self._decompressors[t] = compressor
683 except KeyError:
683 except KeyError:
684 raise error.RevlogError(
684 raise error.RevlogError(
685 _(b'unknown compression type %s') % binascii.hexlify(t)
685 _(b'unknown compression type %s') % binascii.hexlify(t)
686 )
686 )
687 return compressor
687 return compressor
688
688
689 @util.propertycache
689 @util.propertycache
690 def _compressor(self):
690 def _compressor(self):
691 engine = util.compengines[self._compengine]
691 engine = util.compengines[self._compengine]
692 return engine.revlogcompressor(self._compengineopts)
692 return engine.revlogcompressor(self._compengineopts)
693
693
694 @util.propertycache
694 @util.propertycache
695 def _decompressor(self):
695 def _decompressor(self):
696 """the default decompressor"""
696 """the default decompressor"""
697 if self._docket is None:
697 if self._docket is None:
698 return None
698 return None
699 t = self._docket.default_compression_header
699 t = self._docket.default_compression_header
700 c = self._get_decompressor(t)
700 c = self._get_decompressor(t)
701 return c.decompress
701 return c.decompress
702
702
703 def _indexfp(self):
703 def _indexfp(self):
704 """file object for the revlog's index file"""
704 """file object for the revlog's index file"""
705 return self.opener(self._indexfile, mode=b"r")
705 return self.opener(self._indexfile, mode=b"r")
706
706
707 def __index_write_fp(self):
707 def __index_write_fp(self):
708 # You should not use this directly and use `_writing` instead
708 # You should not use this directly and use `_writing` instead
709 try:
709 try:
710 f = self.opener(
710 f = self.opener(
711 self._indexfile, mode=b"r+", checkambig=self._checkambig
711 self._indexfile, mode=b"r+", checkambig=self._checkambig
712 )
712 )
713 if self._docket is None:
713 if self._docket is None:
714 f.seek(0, os.SEEK_END)
714 f.seek(0, os.SEEK_END)
715 else:
715 else:
716 f.seek(self._docket.index_end, os.SEEK_SET)
716 f.seek(self._docket.index_end, os.SEEK_SET)
717 return f
717 return f
718 except FileNotFoundError:
718 except FileNotFoundError:
719 return self.opener(
719 return self.opener(
720 self._indexfile, mode=b"w+", checkambig=self._checkambig
720 self._indexfile, mode=b"w+", checkambig=self._checkambig
721 )
721 )
722
722
723 def __index_new_fp(self):
723 def __index_new_fp(self):
724 # You should not use this unless you are upgrading from inline revlog
724 # You should not use this unless you are upgrading from inline revlog
725 return self.opener(
725 return self.opener(
726 self._indexfile,
726 self._indexfile,
727 mode=b"w",
727 mode=b"w",
728 checkambig=self._checkambig,
728 checkambig=self._checkambig,
729 atomictemp=True,
729 atomictemp=True,
730 )
730 )
731
731
732 def _datafp(self, mode=b'r'):
732 def _datafp(self, mode=b'r'):
733 """file object for the revlog's data file"""
733 """file object for the revlog's data file"""
734 return self.opener(self._datafile, mode=mode)
734 return self.opener(self._datafile, mode=mode)
735
735
736 @contextlib.contextmanager
736 @contextlib.contextmanager
737 def _sidedatareadfp(self):
737 def _sidedatareadfp(self):
738 """file object suitable to read sidedata"""
738 """file object suitable to read sidedata"""
739 if self._writinghandles:
739 if self._writinghandles:
740 yield self._writinghandles[2]
740 yield self._writinghandles[2]
741 else:
741 else:
742 with self.opener(self._sidedatafile) as fp:
742 with self.opener(self._sidedatafile) as fp:
743 yield fp
743 yield fp
744
744
745 def tiprev(self):
745 def tiprev(self):
746 return len(self.index) - 1
746 return len(self.index) - 1
747
747
748 def tip(self):
748 def tip(self):
749 return self.node(self.tiprev())
749 return self.node(self.tiprev())
750
750
751 def __contains__(self, rev):
751 def __contains__(self, rev):
752 return 0 <= rev < len(self)
752 return 0 <= rev < len(self)
753
753
754 def __len__(self):
754 def __len__(self):
755 return len(self.index)
755 return len(self.index)
756
756
757 def __iter__(self):
757 def __iter__(self):
758 return iter(range(len(self)))
758 return iter(range(len(self)))
759
759
760 def revs(self, start=0, stop=None):
760 def revs(self, start=0, stop=None):
761 """iterate over all rev in this revlog (from start to stop)"""
761 """iterate over all rev in this revlog (from start to stop)"""
762 return storageutil.iterrevs(len(self), start=start, stop=stop)
762 return storageutil.iterrevs(len(self), start=start, stop=stop)
763
763
764 def hasnode(self, node):
764 def hasnode(self, node):
765 try:
765 try:
766 self.rev(node)
766 self.rev(node)
767 return True
767 return True
768 except KeyError:
768 except KeyError:
769 return False
769 return False
770
770
771 def candelta(self, baserev, rev):
771 def candelta(self, baserev, rev):
772 """whether two revisions (baserev, rev) can be delta-ed or not"""
772 """whether two revisions (baserev, rev) can be delta-ed or not"""
773 # Disable delta if either rev requires a content-changing flag
773 # Disable delta if either rev requires a content-changing flag
774 # processor (ex. LFS). This is because such flag processor can alter
774 # processor (ex. LFS). This is because such flag processor can alter
775 # the rawtext content that the delta will be based on, and two clients
775 # the rawtext content that the delta will be based on, and two clients
776 # could have a same revlog node with different flags (i.e. different
776 # could have a same revlog node with different flags (i.e. different
777 # rawtext contents) and the delta could be incompatible.
777 # rawtext contents) and the delta could be incompatible.
778 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
778 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
779 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
779 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
780 ):
780 ):
781 return False
781 return False
782 return True
782 return True
783
783
784 def update_caches(self, transaction):
784 def update_caches(self, transaction):
785 if self._nodemap_file is not None:
785 if self._nodemap_file is not None:
786 if transaction is None:
786 if transaction is None:
787 nodemaputil.update_persistent_nodemap(self)
787 nodemaputil.update_persistent_nodemap(self)
788 else:
788 else:
789 nodemaputil.setup_persistent_nodemap(transaction, self)
789 nodemaputil.setup_persistent_nodemap(transaction, self)
790
790
791 def clearcaches(self):
791 def clearcaches(self):
792 self._revisioncache = None
792 self._revisioncache = None
793 self._chainbasecache.clear()
793 self._chainbasecache.clear()
794 self._segmentfile.clear_cache()
794 self._segmentfile.clear_cache()
795 self._segmentfile_sidedata.clear_cache()
795 self._segmentfile_sidedata.clear_cache()
796 self._pcache = {}
796 self._pcache = {}
797 self._nodemap_docket = None
797 self._nodemap_docket = None
798 self.index.clearcaches()
798 self.index.clearcaches()
799 # The python code is the one responsible for validating the docket, we
799 # The python code is the one responsible for validating the docket, we
800 # end up having to refresh it here.
800 # end up having to refresh it here.
801 use_nodemap = (
801 use_nodemap = (
802 not self._inline
802 not self._inline
803 and self._nodemap_file is not None
803 and self._nodemap_file is not None
804 and util.safehasattr(self.index, 'update_nodemap_data')
804 and util.safehasattr(self.index, 'update_nodemap_data')
805 )
805 )
806 if use_nodemap:
806 if use_nodemap:
807 nodemap_data = nodemaputil.persisted_data(self)
807 nodemap_data = nodemaputil.persisted_data(self)
808 if nodemap_data is not None:
808 if nodemap_data is not None:
809 self._nodemap_docket = nodemap_data[0]
809 self._nodemap_docket = nodemap_data[0]
810 self.index.update_nodemap_data(*nodemap_data)
810 self.index.update_nodemap_data(*nodemap_data)
811
811
812 def rev(self, node):
812 def rev(self, node):
813 try:
813 try:
814 return self.index.rev(node)
814 return self.index.rev(node)
815 except TypeError:
815 except TypeError:
816 raise
816 raise
817 except error.RevlogError:
817 except error.RevlogError:
818 # parsers.c radix tree lookup failed
818 # parsers.c radix tree lookup failed
819 if (
819 if (
820 node == self.nodeconstants.wdirid
820 node == self.nodeconstants.wdirid
821 or node in self.nodeconstants.wdirfilenodeids
821 or node in self.nodeconstants.wdirfilenodeids
822 ):
822 ):
823 raise error.WdirUnsupported
823 raise error.WdirUnsupported
824 raise error.LookupError(node, self.display_id, _(b'no node'))
824 raise error.LookupError(node, self.display_id, _(b'no node'))
825
825
826 # Accessors for index entries.
826 # Accessors for index entries.
827
827
828 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
828 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
829 # are flags.
829 # are flags.
830 def start(self, rev):
830 def start(self, rev):
831 return int(self.index[rev][0] >> 16)
831 return int(self.index[rev][0] >> 16)
832
832
833 def sidedata_cut_off(self, rev):
833 def sidedata_cut_off(self, rev):
834 sd_cut_off = self.index[rev][8]
834 sd_cut_off = self.index[rev][8]
835 if sd_cut_off != 0:
835 if sd_cut_off != 0:
836 return sd_cut_off
836 return sd_cut_off
837 # This is some annoying dance, because entries without sidedata
837 # This is some annoying dance, because entries without sidedata
838 # currently use 0 as their ofsset. (instead of previous-offset +
838 # currently use 0 as their ofsset. (instead of previous-offset +
839 # previous-size)
839 # previous-size)
840 #
840 #
841 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
841 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
842 # In the meantime, we need this.
842 # In the meantime, we need this.
843 while 0 <= rev:
843 while 0 <= rev:
844 e = self.index[rev]
844 e = self.index[rev]
845 if e[9] != 0:
845 if e[9] != 0:
846 return e[8] + e[9]
846 return e[8] + e[9]
847 rev -= 1
847 rev -= 1
848 return 0
848 return 0
849
849
850 def flags(self, rev):
850 def flags(self, rev):
851 return self.index[rev][0] & 0xFFFF
851 return self.index[rev][0] & 0xFFFF
852
852
853 def length(self, rev):
853 def length(self, rev):
854 return self.index[rev][1]
854 return self.index[rev][1]
855
855
856 def sidedata_length(self, rev):
856 def sidedata_length(self, rev):
857 if not self.hassidedata:
857 if not self.hassidedata:
858 return 0
858 return 0
859 return self.index[rev][9]
859 return self.index[rev][9]
860
860
861 def rawsize(self, rev):
861 def rawsize(self, rev):
862 """return the length of the uncompressed text for a given revision"""
862 """return the length of the uncompressed text for a given revision"""
863 l = self.index[rev][2]
863 l = self.index[rev][2]
864 if l >= 0:
864 if l >= 0:
865 return l
865 return l
866
866
867 t = self.rawdata(rev)
867 t = self.rawdata(rev)
868 return len(t)
868 return len(t)
869
869
870 def size(self, rev):
870 def size(self, rev):
871 """length of non-raw text (processed by a "read" flag processor)"""
871 """length of non-raw text (processed by a "read" flag processor)"""
872 # fast path: if no "read" flag processor could change the content,
872 # fast path: if no "read" flag processor could change the content,
873 # size is rawsize. note: ELLIPSIS is known to not change the content.
873 # size is rawsize. note: ELLIPSIS is known to not change the content.
874 flags = self.flags(rev)
874 flags = self.flags(rev)
875 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
875 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
876 return self.rawsize(rev)
876 return self.rawsize(rev)
877
877
878 return len(self.revision(rev))
878 return len(self.revision(rev))
879
879
880 def fast_rank(self, rev):
880 def fast_rank(self, rev):
881 """Return the rank of a revision if already known, or None otherwise.
881 """Return the rank of a revision if already known, or None otherwise.
882
882
883 The rank of a revision is the size of the sub-graph it defines as a
883 The rank of a revision is the size of the sub-graph it defines as a
884 head. Equivalently, the rank of a revision `r` is the size of the set
884 head. Equivalently, the rank of a revision `r` is the size of the set
885 `ancestors(r)`, `r` included.
885 `ancestors(r)`, `r` included.
886
886
887 This method returns the rank retrieved from the revlog in constant
887 This method returns the rank retrieved from the revlog in constant
888 time. It makes no attempt at computing unknown values for versions of
888 time. It makes no attempt at computing unknown values for versions of
889 the revlog which do not persist the rank.
889 the revlog which do not persist the rank.
890 """
890 """
891 rank = self.index[rev][ENTRY_RANK]
891 rank = self.index[rev][ENTRY_RANK]
892 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
892 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
893 return None
893 return None
894 if rev == nullrev:
894 if rev == nullrev:
895 return 0 # convention
895 return 0 # convention
896 return rank
896 return rank
897
897
898 def chainbase(self, rev):
898 def chainbase(self, rev):
899 base = self._chainbasecache.get(rev)
899 base = self._chainbasecache.get(rev)
900 if base is not None:
900 if base is not None:
901 return base
901 return base
902
902
903 index = self.index
903 index = self.index
904 iterrev = rev
904 iterrev = rev
905 base = index[iterrev][3]
905 base = index[iterrev][3]
906 while base != iterrev:
906 while base != iterrev:
907 iterrev = base
907 iterrev = base
908 base = index[iterrev][3]
908 base = index[iterrev][3]
909
909
910 self._chainbasecache[rev] = base
910 self._chainbasecache[rev] = base
911 return base
911 return base
912
912
913 def linkrev(self, rev):
913 def linkrev(self, rev):
914 return self.index[rev][4]
914 return self.index[rev][4]
915
915
916 def parentrevs(self, rev):
916 def parentrevs(self, rev):
917 try:
917 try:
918 entry = self.index[rev]
918 entry = self.index[rev]
919 except IndexError:
919 except IndexError:
920 if rev == wdirrev:
920 if rev == wdirrev:
921 raise error.WdirUnsupported
921 raise error.WdirUnsupported
922 raise
922 raise
923
923
924 if self.canonical_parent_order and entry[5] == nullrev:
924 if self.canonical_parent_order and entry[5] == nullrev:
925 return entry[6], entry[5]
925 return entry[6], entry[5]
926 else:
926 else:
927 return entry[5], entry[6]
927 return entry[5], entry[6]
928
928
929 # fast parentrevs(rev) where rev isn't filtered
929 # fast parentrevs(rev) where rev isn't filtered
930 _uncheckedparentrevs = parentrevs
930 _uncheckedparentrevs = parentrevs
931
931
932 def node(self, rev):
932 def node(self, rev):
933 try:
933 try:
934 return self.index[rev][7]
934 return self.index[rev][7]
935 except IndexError:
935 except IndexError:
936 if rev == wdirrev:
936 if rev == wdirrev:
937 raise error.WdirUnsupported
937 raise error.WdirUnsupported
938 raise
938 raise
939
939
940 # Derived from index values.
940 # Derived from index values.
941
941
942 def end(self, rev):
942 def end(self, rev):
943 return self.start(rev) + self.length(rev)
943 return self.start(rev) + self.length(rev)
944
944
945 def parents(self, node):
945 def parents(self, node):
946 i = self.index
946 i = self.index
947 d = i[self.rev(node)]
947 d = i[self.rev(node)]
948 # inline node() to avoid function call overhead
948 # inline node() to avoid function call overhead
949 if self.canonical_parent_order and d[5] == self.nullid:
949 if self.canonical_parent_order and d[5] == self.nullid:
950 return i[d[6]][7], i[d[5]][7]
950 return i[d[6]][7], i[d[5]][7]
951 else:
951 else:
952 return i[d[5]][7], i[d[6]][7]
952 return i[d[5]][7], i[d[6]][7]
953
953
954 def chainlen(self, rev):
954 def chainlen(self, rev):
955 return self._chaininfo(rev)[0]
955 return self._chaininfo(rev)[0]
956
956
957 def _chaininfo(self, rev):
957 def _chaininfo(self, rev):
958 chaininfocache = self._chaininfocache
958 chaininfocache = self._chaininfocache
959 if rev in chaininfocache:
959 if rev in chaininfocache:
960 return chaininfocache[rev]
960 return chaininfocache[rev]
961 index = self.index
961 index = self.index
962 generaldelta = self._generaldelta
962 generaldelta = self._generaldelta
963 iterrev = rev
963 iterrev = rev
964 e = index[iterrev]
964 e = index[iterrev]
965 clen = 0
965 clen = 0
966 compresseddeltalen = 0
966 compresseddeltalen = 0
967 while iterrev != e[3]:
967 while iterrev != e[3]:
968 clen += 1
968 clen += 1
969 compresseddeltalen += e[1]
969 compresseddeltalen += e[1]
970 if generaldelta:
970 if generaldelta:
971 iterrev = e[3]
971 iterrev = e[3]
972 else:
972 else:
973 iterrev -= 1
973 iterrev -= 1
974 if iterrev in chaininfocache:
974 if iterrev in chaininfocache:
975 t = chaininfocache[iterrev]
975 t = chaininfocache[iterrev]
976 clen += t[0]
976 clen += t[0]
977 compresseddeltalen += t[1]
977 compresseddeltalen += t[1]
978 break
978 break
979 e = index[iterrev]
979 e = index[iterrev]
980 else:
980 else:
981 # Add text length of base since decompressing that also takes
981 # Add text length of base since decompressing that also takes
982 # work. For cache hits the length is already included.
982 # work. For cache hits the length is already included.
983 compresseddeltalen += e[1]
983 compresseddeltalen += e[1]
984 r = (clen, compresseddeltalen)
984 r = (clen, compresseddeltalen)
985 chaininfocache[rev] = r
985 chaininfocache[rev] = r
986 return r
986 return r
987
987
988 def _deltachain(self, rev, stoprev=None):
988 def _deltachain(self, rev, stoprev=None):
989 """Obtain the delta chain for a revision.
989 """Obtain the delta chain for a revision.
990
990
991 ``stoprev`` specifies a revision to stop at. If not specified, we
991 ``stoprev`` specifies a revision to stop at. If not specified, we
992 stop at the base of the chain.
992 stop at the base of the chain.
993
993
994 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
994 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
995 revs in ascending order and ``stopped`` is a bool indicating whether
995 revs in ascending order and ``stopped`` is a bool indicating whether
996 ``stoprev`` was hit.
996 ``stoprev`` was hit.
997 """
997 """
998 # Try C implementation.
998 # Try C implementation.
999 try:
999 try:
1000 return self.index.deltachain(rev, stoprev, self._generaldelta)
1000 return self.index.deltachain(rev, stoprev, self._generaldelta)
1001 except AttributeError:
1001 except AttributeError:
1002 pass
1002 pass
1003
1003
1004 chain = []
1004 chain = []
1005
1005
1006 # Alias to prevent attribute lookup in tight loop.
1006 # Alias to prevent attribute lookup in tight loop.
1007 index = self.index
1007 index = self.index
1008 generaldelta = self._generaldelta
1008 generaldelta = self._generaldelta
1009
1009
1010 iterrev = rev
1010 iterrev = rev
1011 e = index[iterrev]
1011 e = index[iterrev]
1012 while iterrev != e[3] and iterrev != stoprev:
1012 while iterrev != e[3] and iterrev != stoprev:
1013 chain.append(iterrev)
1013 chain.append(iterrev)
1014 if generaldelta:
1014 if generaldelta:
1015 iterrev = e[3]
1015 iterrev = e[3]
1016 else:
1016 else:
1017 iterrev -= 1
1017 iterrev -= 1
1018 e = index[iterrev]
1018 e = index[iterrev]
1019
1019
1020 if iterrev == stoprev:
1020 if iterrev == stoprev:
1021 stopped = True
1021 stopped = True
1022 else:
1022 else:
1023 chain.append(iterrev)
1023 chain.append(iterrev)
1024 stopped = False
1024 stopped = False
1025
1025
1026 chain.reverse()
1026 chain.reverse()
1027 return chain, stopped
1027 return chain, stopped
1028
1028
1029 def ancestors(self, revs, stoprev=0, inclusive=False):
1029 def ancestors(self, revs, stoprev=0, inclusive=False):
1030 """Generate the ancestors of 'revs' in reverse revision order.
1030 """Generate the ancestors of 'revs' in reverse revision order.
1031 Does not generate revs lower than stoprev.
1031 Does not generate revs lower than stoprev.
1032
1032
1033 See the documentation for ancestor.lazyancestors for more details."""
1033 See the documentation for ancestor.lazyancestors for more details."""
1034
1034
1035 # first, make sure start revisions aren't filtered
1035 # first, make sure start revisions aren't filtered
1036 revs = list(revs)
1036 revs = list(revs)
1037 checkrev = self.node
1037 checkrev = self.node
1038 for r in revs:
1038 for r in revs:
1039 checkrev(r)
1039 checkrev(r)
1040 # and we're sure ancestors aren't filtered as well
1040 # and we're sure ancestors aren't filtered as well
1041
1041
1042 if rustancestor is not None and self.index.rust_ext_compat:
1042 if rustancestor is not None and self.index.rust_ext_compat:
1043 lazyancestors = rustancestor.LazyAncestors
1043 lazyancestors = rustancestor.LazyAncestors
1044 arg = self.index
1044 arg = self.index
1045 else:
1045 else:
1046 lazyancestors = ancestor.lazyancestors
1046 lazyancestors = ancestor.lazyancestors
1047 arg = self._uncheckedparentrevs
1047 arg = self._uncheckedparentrevs
1048 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1048 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1049
1049
1050 def descendants(self, revs):
1050 def descendants(self, revs):
1051 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1051 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1052
1052
1053 def findcommonmissing(self, common=None, heads=None):
1053 def findcommonmissing(self, common=None, heads=None):
1054 """Return a tuple of the ancestors of common and the ancestors of heads
1054 """Return a tuple of the ancestors of common and the ancestors of heads
1055 that are not ancestors of common. In revset terminology, we return the
1055 that are not ancestors of common. In revset terminology, we return the
1056 tuple:
1056 tuple:
1057
1057
1058 ::common, (::heads) - (::common)
1058 ::common, (::heads) - (::common)
1059
1059
1060 The list is sorted by revision number, meaning it is
1060 The list is sorted by revision number, meaning it is
1061 topologically sorted.
1061 topologically sorted.
1062
1062
1063 'heads' and 'common' are both lists of node IDs. If heads is
1063 'heads' and 'common' are both lists of node IDs. If heads is
1064 not supplied, uses all of the revlog's heads. If common is not
1064 not supplied, uses all of the revlog's heads. If common is not
1065 supplied, uses nullid."""
1065 supplied, uses nullid."""
1066 if common is None:
1066 if common is None:
1067 common = [self.nullid]
1067 common = [self.nullid]
1068 if heads is None:
1068 if heads is None:
1069 heads = self.heads()
1069 heads = self.heads()
1070
1070
1071 common = [self.rev(n) for n in common]
1071 common = [self.rev(n) for n in common]
1072 heads = [self.rev(n) for n in heads]
1072 heads = [self.rev(n) for n in heads]
1073
1073
1074 # we want the ancestors, but inclusive
1074 # we want the ancestors, but inclusive
1075 class lazyset:
1075 class lazyset:
1076 def __init__(self, lazyvalues):
1076 def __init__(self, lazyvalues):
1077 self.addedvalues = set()
1077 self.addedvalues = set()
1078 self.lazyvalues = lazyvalues
1078 self.lazyvalues = lazyvalues
1079
1079
1080 def __contains__(self, value):
1080 def __contains__(self, value):
1081 return value in self.addedvalues or value in self.lazyvalues
1081 return value in self.addedvalues or value in self.lazyvalues
1082
1082
1083 def __iter__(self):
1083 def __iter__(self):
1084 added = self.addedvalues
1084 added = self.addedvalues
1085 for r in added:
1085 for r in added:
1086 yield r
1086 yield r
1087 for r in self.lazyvalues:
1087 for r in self.lazyvalues:
1088 if not r in added:
1088 if not r in added:
1089 yield r
1089 yield r
1090
1090
1091 def add(self, value):
1091 def add(self, value):
1092 self.addedvalues.add(value)
1092 self.addedvalues.add(value)
1093
1093
1094 def update(self, values):
1094 def update(self, values):
1095 self.addedvalues.update(values)
1095 self.addedvalues.update(values)
1096
1096
1097 has = lazyset(self.ancestors(common))
1097 has = lazyset(self.ancestors(common))
1098 has.add(nullrev)
1098 has.add(nullrev)
1099 has.update(common)
1099 has.update(common)
1100
1100
1101 # take all ancestors from heads that aren't in has
1101 # take all ancestors from heads that aren't in has
1102 missing = set()
1102 missing = set()
1103 visit = collections.deque(r for r in heads if r not in has)
1103 visit = collections.deque(r for r in heads if r not in has)
1104 while visit:
1104 while visit:
1105 r = visit.popleft()
1105 r = visit.popleft()
1106 if r in missing:
1106 if r in missing:
1107 continue
1107 continue
1108 else:
1108 else:
1109 missing.add(r)
1109 missing.add(r)
1110 for p in self.parentrevs(r):
1110 for p in self.parentrevs(r):
1111 if p not in has:
1111 if p not in has:
1112 visit.append(p)
1112 visit.append(p)
1113 missing = list(missing)
1113 missing = list(missing)
1114 missing.sort()
1114 missing.sort()
1115 return has, [self.node(miss) for miss in missing]
1115 return has, [self.node(miss) for miss in missing]
1116
1116
1117 def incrementalmissingrevs(self, common=None):
1117 def incrementalmissingrevs(self, common=None):
1118 """Return an object that can be used to incrementally compute the
1118 """Return an object that can be used to incrementally compute the
1119 revision numbers of the ancestors of arbitrary sets that are not
1119 revision numbers of the ancestors of arbitrary sets that are not
1120 ancestors of common. This is an ancestor.incrementalmissingancestors
1120 ancestors of common. This is an ancestor.incrementalmissingancestors
1121 object.
1121 object.
1122
1122
1123 'common' is a list of revision numbers. If common is not supplied, uses
1123 'common' is a list of revision numbers. If common is not supplied, uses
1124 nullrev.
1124 nullrev.
1125 """
1125 """
1126 if common is None:
1126 if common is None:
1127 common = [nullrev]
1127 common = [nullrev]
1128
1128
1129 if rustancestor is not None and self.index.rust_ext_compat:
1129 if rustancestor is not None and self.index.rust_ext_compat:
1130 return rustancestor.MissingAncestors(self.index, common)
1130 return rustancestor.MissingAncestors(self.index, common)
1131 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1131 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1132
1132
1133 def findmissingrevs(self, common=None, heads=None):
1133 def findmissingrevs(self, common=None, heads=None):
1134 """Return the revision numbers of the ancestors of heads that
1134 """Return the revision numbers of the ancestors of heads that
1135 are not ancestors of common.
1135 are not ancestors of common.
1136
1136
1137 More specifically, return a list of revision numbers corresponding to
1137 More specifically, return a list of revision numbers corresponding to
1138 nodes N such that every N satisfies the following constraints:
1138 nodes N such that every N satisfies the following constraints:
1139
1139
1140 1. N is an ancestor of some node in 'heads'
1140 1. N is an ancestor of some node in 'heads'
1141 2. N is not an ancestor of any node in 'common'
1141 2. N is not an ancestor of any node in 'common'
1142
1142
1143 The list is sorted by revision number, meaning it is
1143 The list is sorted by revision number, meaning it is
1144 topologically sorted.
1144 topologically sorted.
1145
1145
1146 'heads' and 'common' are both lists of revision numbers. If heads is
1146 'heads' and 'common' are both lists of revision numbers. If heads is
1147 not supplied, uses all of the revlog's heads. If common is not
1147 not supplied, uses all of the revlog's heads. If common is not
1148 supplied, uses nullid."""
1148 supplied, uses nullid."""
1149 if common is None:
1149 if common is None:
1150 common = [nullrev]
1150 common = [nullrev]
1151 if heads is None:
1151 if heads is None:
1152 heads = self.headrevs()
1152 heads = self.headrevs()
1153
1153
1154 inc = self.incrementalmissingrevs(common=common)
1154 inc = self.incrementalmissingrevs(common=common)
1155 return inc.missingancestors(heads)
1155 return inc.missingancestors(heads)
1156
1156
1157 def findmissing(self, common=None, heads=None):
1157 def findmissing(self, common=None, heads=None):
1158 """Return the ancestors of heads that are not ancestors of common.
1158 """Return the ancestors of heads that are not ancestors of common.
1159
1159
1160 More specifically, return a list of nodes N such that every N
1160 More specifically, return a list of nodes N such that every N
1161 satisfies the following constraints:
1161 satisfies the following constraints:
1162
1162
1163 1. N is an ancestor of some node in 'heads'
1163 1. N is an ancestor of some node in 'heads'
1164 2. N is not an ancestor of any node in 'common'
1164 2. N is not an ancestor of any node in 'common'
1165
1165
1166 The list is sorted by revision number, meaning it is
1166 The list is sorted by revision number, meaning it is
1167 topologically sorted.
1167 topologically sorted.
1168
1168
1169 'heads' and 'common' are both lists of node IDs. If heads is
1169 'heads' and 'common' are both lists of node IDs. If heads is
1170 not supplied, uses all of the revlog's heads. If common is not
1170 not supplied, uses all of the revlog's heads. If common is not
1171 supplied, uses nullid."""
1171 supplied, uses nullid."""
1172 if common is None:
1172 if common is None:
1173 common = [self.nullid]
1173 common = [self.nullid]
1174 if heads is None:
1174 if heads is None:
1175 heads = self.heads()
1175 heads = self.heads()
1176
1176
1177 common = [self.rev(n) for n in common]
1177 common = [self.rev(n) for n in common]
1178 heads = [self.rev(n) for n in heads]
1178 heads = [self.rev(n) for n in heads]
1179
1179
1180 inc = self.incrementalmissingrevs(common=common)
1180 inc = self.incrementalmissingrevs(common=common)
1181 return [self.node(r) for r in inc.missingancestors(heads)]
1181 return [self.node(r) for r in inc.missingancestors(heads)]
1182
1182
1183 def nodesbetween(self, roots=None, heads=None):
1183 def nodesbetween(self, roots=None, heads=None):
1184 """Return a topological path from 'roots' to 'heads'.
1184 """Return a topological path from 'roots' to 'heads'.
1185
1185
1186 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1186 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1187 topologically sorted list of all nodes N that satisfy both of
1187 topologically sorted list of all nodes N that satisfy both of
1188 these constraints:
1188 these constraints:
1189
1189
1190 1. N is a descendant of some node in 'roots'
1190 1. N is a descendant of some node in 'roots'
1191 2. N is an ancestor of some node in 'heads'
1191 2. N is an ancestor of some node in 'heads'
1192
1192
1193 Every node is considered to be both a descendant and an ancestor
1193 Every node is considered to be both a descendant and an ancestor
1194 of itself, so every reachable node in 'roots' and 'heads' will be
1194 of itself, so every reachable node in 'roots' and 'heads' will be
1195 included in 'nodes'.
1195 included in 'nodes'.
1196
1196
1197 'outroots' is the list of reachable nodes in 'roots', i.e., the
1197 'outroots' is the list of reachable nodes in 'roots', i.e., the
1198 subset of 'roots' that is returned in 'nodes'. Likewise,
1198 subset of 'roots' that is returned in 'nodes'. Likewise,
1199 'outheads' is the subset of 'heads' that is also in 'nodes'.
1199 'outheads' is the subset of 'heads' that is also in 'nodes'.
1200
1200
1201 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1201 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1202 unspecified, uses nullid as the only root. If 'heads' is
1202 unspecified, uses nullid as the only root. If 'heads' is
1203 unspecified, uses list of all of the revlog's heads."""
1203 unspecified, uses list of all of the revlog's heads."""
1204 nonodes = ([], [], [])
1204 nonodes = ([], [], [])
1205 if roots is not None:
1205 if roots is not None:
1206 roots = list(roots)
1206 roots = list(roots)
1207 if not roots:
1207 if not roots:
1208 return nonodes
1208 return nonodes
1209 lowestrev = min([self.rev(n) for n in roots])
1209 lowestrev = min([self.rev(n) for n in roots])
1210 else:
1210 else:
1211 roots = [self.nullid] # Everybody's a descendant of nullid
1211 roots = [self.nullid] # Everybody's a descendant of nullid
1212 lowestrev = nullrev
1212 lowestrev = nullrev
1213 if (lowestrev == nullrev) and (heads is None):
1213 if (lowestrev == nullrev) and (heads is None):
1214 # We want _all_ the nodes!
1214 # We want _all_ the nodes!
1215 return (
1215 return (
1216 [self.node(r) for r in self],
1216 [self.node(r) for r in self],
1217 [self.nullid],
1217 [self.nullid],
1218 list(self.heads()),
1218 list(self.heads()),
1219 )
1219 )
1220 if heads is None:
1220 if heads is None:
1221 # All nodes are ancestors, so the latest ancestor is the last
1221 # All nodes are ancestors, so the latest ancestor is the last
1222 # node.
1222 # node.
1223 highestrev = len(self) - 1
1223 highestrev = len(self) - 1
1224 # Set ancestors to None to signal that every node is an ancestor.
1224 # Set ancestors to None to signal that every node is an ancestor.
1225 ancestors = None
1225 ancestors = None
1226 # Set heads to an empty dictionary for later discovery of heads
1226 # Set heads to an empty dictionary for later discovery of heads
1227 heads = {}
1227 heads = {}
1228 else:
1228 else:
1229 heads = list(heads)
1229 heads = list(heads)
1230 if not heads:
1230 if not heads:
1231 return nonodes
1231 return nonodes
1232 ancestors = set()
1232 ancestors = set()
1233 # Turn heads into a dictionary so we can remove 'fake' heads.
1233 # Turn heads into a dictionary so we can remove 'fake' heads.
1234 # Also, later we will be using it to filter out the heads we can't
1234 # Also, later we will be using it to filter out the heads we can't
1235 # find from roots.
1235 # find from roots.
1236 heads = dict.fromkeys(heads, False)
1236 heads = dict.fromkeys(heads, False)
1237 # Start at the top and keep marking parents until we're done.
1237 # Start at the top and keep marking parents until we're done.
1238 nodestotag = set(heads)
1238 nodestotag = set(heads)
1239 # Remember where the top was so we can use it as a limit later.
1239 # Remember where the top was so we can use it as a limit later.
1240 highestrev = max([self.rev(n) for n in nodestotag])
1240 highestrev = max([self.rev(n) for n in nodestotag])
1241 while nodestotag:
1241 while nodestotag:
1242 # grab a node to tag
1242 # grab a node to tag
1243 n = nodestotag.pop()
1243 n = nodestotag.pop()
1244 # Never tag nullid
1244 # Never tag nullid
1245 if n == self.nullid:
1245 if n == self.nullid:
1246 continue
1246 continue
1247 # A node's revision number represents its place in a
1247 # A node's revision number represents its place in a
1248 # topologically sorted list of nodes.
1248 # topologically sorted list of nodes.
1249 r = self.rev(n)
1249 r = self.rev(n)
1250 if r >= lowestrev:
1250 if r >= lowestrev:
1251 if n not in ancestors:
1251 if n not in ancestors:
1252 # If we are possibly a descendant of one of the roots
1252 # If we are possibly a descendant of one of the roots
1253 # and we haven't already been marked as an ancestor
1253 # and we haven't already been marked as an ancestor
1254 ancestors.add(n) # Mark as ancestor
1254 ancestors.add(n) # Mark as ancestor
1255 # Add non-nullid parents to list of nodes to tag.
1255 # Add non-nullid parents to list of nodes to tag.
1256 nodestotag.update(
1256 nodestotag.update(
1257 [p for p in self.parents(n) if p != self.nullid]
1257 [p for p in self.parents(n) if p != self.nullid]
1258 )
1258 )
1259 elif n in heads: # We've seen it before, is it a fake head?
1259 elif n in heads: # We've seen it before, is it a fake head?
1260 # So it is, real heads should not be the ancestors of
1260 # So it is, real heads should not be the ancestors of
1261 # any other heads.
1261 # any other heads.
1262 heads.pop(n)
1262 heads.pop(n)
1263 if not ancestors:
1263 if not ancestors:
1264 return nonodes
1264 return nonodes
1265 # Now that we have our set of ancestors, we want to remove any
1265 # Now that we have our set of ancestors, we want to remove any
1266 # roots that are not ancestors.
1266 # roots that are not ancestors.
1267
1267
1268 # If one of the roots was nullid, everything is included anyway.
1268 # If one of the roots was nullid, everything is included anyway.
1269 if lowestrev > nullrev:
1269 if lowestrev > nullrev:
1270 # But, since we weren't, let's recompute the lowest rev to not
1270 # But, since we weren't, let's recompute the lowest rev to not
1271 # include roots that aren't ancestors.
1271 # include roots that aren't ancestors.
1272
1272
1273 # Filter out roots that aren't ancestors of heads
1273 # Filter out roots that aren't ancestors of heads
1274 roots = [root for root in roots if root in ancestors]
1274 roots = [root for root in roots if root in ancestors]
1275 # Recompute the lowest revision
1275 # Recompute the lowest revision
1276 if roots:
1276 if roots:
1277 lowestrev = min([self.rev(root) for root in roots])
1277 lowestrev = min([self.rev(root) for root in roots])
1278 else:
1278 else:
1279 # No more roots? Return empty list
1279 # No more roots? Return empty list
1280 return nonodes
1280 return nonodes
1281 else:
1281 else:
1282 # We are descending from nullid, and don't need to care about
1282 # We are descending from nullid, and don't need to care about
1283 # any other roots.
1283 # any other roots.
1284 lowestrev = nullrev
1284 lowestrev = nullrev
1285 roots = [self.nullid]
1285 roots = [self.nullid]
1286 # Transform our roots list into a set.
1286 # Transform our roots list into a set.
1287 descendants = set(roots)
1287 descendants = set(roots)
1288 # Also, keep the original roots so we can filter out roots that aren't
1288 # Also, keep the original roots so we can filter out roots that aren't
1289 # 'real' roots (i.e. are descended from other roots).
1289 # 'real' roots (i.e. are descended from other roots).
1290 roots = descendants.copy()
1290 roots = descendants.copy()
1291 # Our topologically sorted list of output nodes.
1291 # Our topologically sorted list of output nodes.
1292 orderedout = []
1292 orderedout = []
1293 # Don't start at nullid since we don't want nullid in our output list,
1293 # Don't start at nullid since we don't want nullid in our output list,
1294 # and if nullid shows up in descendants, empty parents will look like
1294 # and if nullid shows up in descendants, empty parents will look like
1295 # they're descendants.
1295 # they're descendants.
1296 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1296 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1297 n = self.node(r)
1297 n = self.node(r)
1298 isdescendant = False
1298 isdescendant = False
1299 if lowestrev == nullrev: # Everybody is a descendant of nullid
1299 if lowestrev == nullrev: # Everybody is a descendant of nullid
1300 isdescendant = True
1300 isdescendant = True
1301 elif n in descendants:
1301 elif n in descendants:
1302 # n is already a descendant
1302 # n is already a descendant
1303 isdescendant = True
1303 isdescendant = True
1304 # This check only needs to be done here because all the roots
1304 # This check only needs to be done here because all the roots
1305 # will start being marked is descendants before the loop.
1305 # will start being marked is descendants before the loop.
1306 if n in roots:
1306 if n in roots:
1307 # If n was a root, check if it's a 'real' root.
1307 # If n was a root, check if it's a 'real' root.
1308 p = tuple(self.parents(n))
1308 p = tuple(self.parents(n))
1309 # If any of its parents are descendants, it's not a root.
1309 # If any of its parents are descendants, it's not a root.
1310 if (p[0] in descendants) or (p[1] in descendants):
1310 if (p[0] in descendants) or (p[1] in descendants):
1311 roots.remove(n)
1311 roots.remove(n)
1312 else:
1312 else:
1313 p = tuple(self.parents(n))
1313 p = tuple(self.parents(n))
1314 # A node is a descendant if either of its parents are
1314 # A node is a descendant if either of its parents are
1315 # descendants. (We seeded the dependents list with the roots
1315 # descendants. (We seeded the dependents list with the roots
1316 # up there, remember?)
1316 # up there, remember?)
1317 if (p[0] in descendants) or (p[1] in descendants):
1317 if (p[0] in descendants) or (p[1] in descendants):
1318 descendants.add(n)
1318 descendants.add(n)
1319 isdescendant = True
1319 isdescendant = True
1320 if isdescendant and ((ancestors is None) or (n in ancestors)):
1320 if isdescendant and ((ancestors is None) or (n in ancestors)):
1321 # Only include nodes that are both descendants and ancestors.
1321 # Only include nodes that are both descendants and ancestors.
1322 orderedout.append(n)
1322 orderedout.append(n)
1323 if (ancestors is not None) and (n in heads):
1323 if (ancestors is not None) and (n in heads):
1324 # We're trying to figure out which heads are reachable
1324 # We're trying to figure out which heads are reachable
1325 # from roots.
1325 # from roots.
1326 # Mark this head as having been reached
1326 # Mark this head as having been reached
1327 heads[n] = True
1327 heads[n] = True
1328 elif ancestors is None:
1328 elif ancestors is None:
1329 # Otherwise, we're trying to discover the heads.
1329 # Otherwise, we're trying to discover the heads.
1330 # Assume this is a head because if it isn't, the next step
1330 # Assume this is a head because if it isn't, the next step
1331 # will eventually remove it.
1331 # will eventually remove it.
1332 heads[n] = True
1332 heads[n] = True
1333 # But, obviously its parents aren't.
1333 # But, obviously its parents aren't.
1334 for p in self.parents(n):
1334 for p in self.parents(n):
1335 heads.pop(p, None)
1335 heads.pop(p, None)
1336 heads = [head for head, flag in heads.items() if flag]
1336 heads = [head for head, flag in heads.items() if flag]
1337 roots = list(roots)
1337 roots = list(roots)
1338 assert orderedout
1338 assert orderedout
1339 assert roots
1339 assert roots
1340 assert heads
1340 assert heads
1341 return (orderedout, roots, heads)
1341 return (orderedout, roots, heads)
1342
1342
1343 def headrevs(self, revs=None):
1343 def headrevs(self, revs=None):
1344 if revs is None:
1344 if revs is None:
1345 try:
1345 try:
1346 return self.index.headrevs()
1346 return self.index.headrevs()
1347 except AttributeError:
1347 except AttributeError:
1348 return self._headrevs()
1348 return self._headrevs()
1349 if rustdagop is not None and self.index.rust_ext_compat:
1349 if rustdagop is not None and self.index.rust_ext_compat:
1350 return rustdagop.headrevs(self.index, revs)
1350 return rustdagop.headrevs(self.index, revs)
1351 return dagop.headrevs(revs, self._uncheckedparentrevs)
1351 return dagop.headrevs(revs, self._uncheckedparentrevs)
1352
1352
1353 def computephases(self, roots):
1353 def computephases(self, roots):
1354 return self.index.computephasesmapsets(roots)
1354 return self.index.computephasesmapsets(roots)
1355
1355
1356 def _headrevs(self):
1356 def _headrevs(self):
1357 count = len(self)
1357 count = len(self)
1358 if not count:
1358 if not count:
1359 return [nullrev]
1359 return [nullrev]
1360 # we won't iter over filtered rev so nobody is a head at start
1360 # we won't iter over filtered rev so nobody is a head at start
1361 ishead = [0] * (count + 1)
1361 ishead = [0] * (count + 1)
1362 index = self.index
1362 index = self.index
1363 for r in self:
1363 for r in self:
1364 ishead[r] = 1 # I may be an head
1364 ishead[r] = 1 # I may be an head
1365 e = index[r]
1365 e = index[r]
1366 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1366 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1367 return [r for r, val in enumerate(ishead) if val]
1367 return [r for r, val in enumerate(ishead) if val]
1368
1368
1369 def heads(self, start=None, stop=None):
1369 def heads(self, start=None, stop=None):
1370 """return the list of all nodes that have no children
1370 """return the list of all nodes that have no children
1371
1371
1372 if start is specified, only heads that are descendants of
1372 if start is specified, only heads that are descendants of
1373 start will be returned
1373 start will be returned
1374 if stop is specified, it will consider all the revs from stop
1374 if stop is specified, it will consider all the revs from stop
1375 as if they had no children
1375 as if they had no children
1376 """
1376 """
1377 if start is None and stop is None:
1377 if start is None and stop is None:
1378 if not len(self):
1378 if not len(self):
1379 return [self.nullid]
1379 return [self.nullid]
1380 return [self.node(r) for r in self.headrevs()]
1380 return [self.node(r) for r in self.headrevs()]
1381
1381
1382 if start is None:
1382 if start is None:
1383 start = nullrev
1383 start = nullrev
1384 else:
1384 else:
1385 start = self.rev(start)
1385 start = self.rev(start)
1386
1386
1387 stoprevs = {self.rev(n) for n in stop or []}
1387 stoprevs = {self.rev(n) for n in stop or []}
1388
1388
1389 revs = dagop.headrevssubset(
1389 revs = dagop.headrevssubset(
1390 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1390 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1391 )
1391 )
1392
1392
1393 return [self.node(rev) for rev in revs]
1393 return [self.node(rev) for rev in revs]
1394
1394
1395 def children(self, node):
1395 def children(self, node):
1396 """find the children of a given node"""
1396 """find the children of a given node"""
1397 c = []
1397 c = []
1398 p = self.rev(node)
1398 p = self.rev(node)
1399 for r in self.revs(start=p + 1):
1399 for r in self.revs(start=p + 1):
1400 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1400 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1401 if prevs:
1401 if prevs:
1402 for pr in prevs:
1402 for pr in prevs:
1403 if pr == p:
1403 if pr == p:
1404 c.append(self.node(r))
1404 c.append(self.node(r))
1405 elif p == nullrev:
1405 elif p == nullrev:
1406 c.append(self.node(r))
1406 c.append(self.node(r))
1407 return c
1407 return c
1408
1408
1409 def commonancestorsheads(self, a, b):
1409 def commonancestorsheads(self, a, b):
1410 """calculate all the heads of the common ancestors of nodes a and b"""
1410 """calculate all the heads of the common ancestors of nodes a and b"""
1411 a, b = self.rev(a), self.rev(b)
1411 a, b = self.rev(a), self.rev(b)
1412 ancs = self._commonancestorsheads(a, b)
1412 ancs = self._commonancestorsheads(a, b)
1413 return pycompat.maplist(self.node, ancs)
1413 return pycompat.maplist(self.node, ancs)
1414
1414
1415 def _commonancestorsheads(self, *revs):
1415 def _commonancestorsheads(self, *revs):
1416 """calculate all the heads of the common ancestors of revs"""
1416 """calculate all the heads of the common ancestors of revs"""
1417 try:
1417 try:
1418 ancs = self.index.commonancestorsheads(*revs)
1418 ancs = self.index.commonancestorsheads(*revs)
1419 except (AttributeError, OverflowError): # C implementation failed
1419 except (AttributeError, OverflowError): # C implementation failed
1420 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1420 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1421 return ancs
1421 return ancs
1422
1422
1423 def isancestor(self, a, b):
1423 def isancestor(self, a, b):
1424 """return True if node a is an ancestor of node b
1424 """return True if node a is an ancestor of node b
1425
1425
1426 A revision is considered an ancestor of itself."""
1426 A revision is considered an ancestor of itself."""
1427 a, b = self.rev(a), self.rev(b)
1427 a, b = self.rev(a), self.rev(b)
1428 return self.isancestorrev(a, b)
1428 return self.isancestorrev(a, b)
1429
1429
1430 def isancestorrev(self, a, b):
1430 def isancestorrev(self, a, b):
1431 """return True if revision a is an ancestor of revision b
1431 """return True if revision a is an ancestor of revision b
1432
1432
1433 A revision is considered an ancestor of itself.
1433 A revision is considered an ancestor of itself.
1434
1434
1435 The implementation of this is trivial but the use of
1435 The implementation of this is trivial but the use of
1436 reachableroots is not."""
1436 reachableroots is not."""
1437 if a == nullrev:
1437 if a == nullrev:
1438 return True
1438 return True
1439 elif a == b:
1439 elif a == b:
1440 return True
1440 return True
1441 elif a > b:
1441 elif a > b:
1442 return False
1442 return False
1443 return bool(self.reachableroots(a, [b], [a], includepath=False))
1443 return bool(self.reachableroots(a, [b], [a], includepath=False))
1444
1444
1445 def reachableroots(self, minroot, heads, roots, includepath=False):
1445 def reachableroots(self, minroot, heads, roots, includepath=False):
1446 """return (heads(::(<roots> and <roots>::<heads>)))
1446 """return (heads(::(<roots> and <roots>::<heads>)))
1447
1447
1448 If includepath is True, return (<roots>::<heads>)."""
1448 If includepath is True, return (<roots>::<heads>)."""
1449 try:
1449 try:
1450 return self.index.reachableroots2(
1450 return self.index.reachableroots2(
1451 minroot, heads, roots, includepath
1451 minroot, heads, roots, includepath
1452 )
1452 )
1453 except AttributeError:
1453 except AttributeError:
1454 return dagop._reachablerootspure(
1454 return dagop._reachablerootspure(
1455 self.parentrevs, minroot, roots, heads, includepath
1455 self.parentrevs, minroot, roots, heads, includepath
1456 )
1456 )
1457
1457
1458 def ancestor(self, a, b):
1458 def ancestor(self, a, b):
1459 """calculate the "best" common ancestor of nodes a and b"""
1459 """calculate the "best" common ancestor of nodes a and b"""
1460
1460
1461 a, b = self.rev(a), self.rev(b)
1461 a, b = self.rev(a), self.rev(b)
1462 try:
1462 try:
1463 ancs = self.index.ancestors(a, b)
1463 ancs = self.index.ancestors(a, b)
1464 except (AttributeError, OverflowError):
1464 except (AttributeError, OverflowError):
1465 ancs = ancestor.ancestors(self.parentrevs, a, b)
1465 ancs = ancestor.ancestors(self.parentrevs, a, b)
1466 if ancs:
1466 if ancs:
1467 # choose a consistent winner when there's a tie
1467 # choose a consistent winner when there's a tie
1468 return min(map(self.node, ancs))
1468 return min(map(self.node, ancs))
1469 return self.nullid
1469 return self.nullid
1470
1470
1471 def _match(self, id):
1471 def _match(self, id):
1472 if isinstance(id, int):
1472 if isinstance(id, int):
1473 # rev
1473 # rev
1474 return self.node(id)
1474 return self.node(id)
1475 if len(id) == self.nodeconstants.nodelen:
1475 if len(id) == self.nodeconstants.nodelen:
1476 # possibly a binary node
1476 # possibly a binary node
1477 # odds of a binary node being all hex in ASCII are 1 in 10**25
1477 # odds of a binary node being all hex in ASCII are 1 in 10**25
1478 try:
1478 try:
1479 node = id
1479 node = id
1480 self.rev(node) # quick search the index
1480 self.rev(node) # quick search the index
1481 return node
1481 return node
1482 except error.LookupError:
1482 except error.LookupError:
1483 pass # may be partial hex id
1483 pass # may be partial hex id
1484 try:
1484 try:
1485 # str(rev)
1485 # str(rev)
1486 rev = int(id)
1486 rev = int(id)
1487 if b"%d" % rev != id:
1487 if b"%d" % rev != id:
1488 raise ValueError
1488 raise ValueError
1489 if rev < 0:
1489 if rev < 0:
1490 rev = len(self) + rev
1490 rev = len(self) + rev
1491 if rev < 0 or rev >= len(self):
1491 if rev < 0 or rev >= len(self):
1492 raise ValueError
1492 raise ValueError
1493 return self.node(rev)
1493 return self.node(rev)
1494 except (ValueError, OverflowError):
1494 except (ValueError, OverflowError):
1495 pass
1495 pass
1496 if len(id) == 2 * self.nodeconstants.nodelen:
1496 if len(id) == 2 * self.nodeconstants.nodelen:
1497 try:
1497 try:
1498 # a full hex nodeid?
1498 # a full hex nodeid?
1499 node = bin(id)
1499 node = bin(id)
1500 self.rev(node)
1500 self.rev(node)
1501 return node
1501 return node
1502 except (binascii.Error, error.LookupError):
1502 except (binascii.Error, error.LookupError):
1503 pass
1503 pass
1504
1504
1505 def _partialmatch(self, id):
1505 def _partialmatch(self, id):
1506 # we don't care wdirfilenodeids as they should be always full hash
1506 # we don't care wdirfilenodeids as they should be always full hash
1507 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1507 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1508 ambiguous = False
1508 ambiguous = False
1509 try:
1509 try:
1510 partial = self.index.partialmatch(id)
1510 partial = self.index.partialmatch(id)
1511 if partial and self.hasnode(partial):
1511 if partial and self.hasnode(partial):
1512 if maybewdir:
1512 if maybewdir:
1513 # single 'ff...' match in radix tree, ambiguous with wdir
1513 # single 'ff...' match in radix tree, ambiguous with wdir
1514 ambiguous = True
1514 ambiguous = True
1515 else:
1515 else:
1516 return partial
1516 return partial
1517 elif maybewdir:
1517 elif maybewdir:
1518 # no 'ff...' match in radix tree, wdir identified
1518 # no 'ff...' match in radix tree, wdir identified
1519 raise error.WdirUnsupported
1519 raise error.WdirUnsupported
1520 else:
1520 else:
1521 return None
1521 return None
1522 except error.RevlogError:
1522 except error.RevlogError:
1523 # parsers.c radix tree lookup gave multiple matches
1523 # parsers.c radix tree lookup gave multiple matches
1524 # fast path: for unfiltered changelog, radix tree is accurate
1524 # fast path: for unfiltered changelog, radix tree is accurate
1525 if not getattr(self, 'filteredrevs', None):
1525 if not getattr(self, 'filteredrevs', None):
1526 ambiguous = True
1526 ambiguous = True
1527 # fall through to slow path that filters hidden revisions
1527 # fall through to slow path that filters hidden revisions
1528 except (AttributeError, ValueError):
1528 except (AttributeError, ValueError):
1529 # we are pure python, or key is not hex
1529 # we are pure python, or key is not hex
1530 pass
1530 pass
1531 if ambiguous:
1531 if ambiguous:
1532 raise error.AmbiguousPrefixLookupError(
1532 raise error.AmbiguousPrefixLookupError(
1533 id, self.display_id, _(b'ambiguous identifier')
1533 id, self.display_id, _(b'ambiguous identifier')
1534 )
1534 )
1535
1535
1536 if id in self._pcache:
1536 if id in self._pcache:
1537 return self._pcache[id]
1537 return self._pcache[id]
1538
1538
1539 if len(id) <= 40:
1539 if len(id) <= 40:
1540 # hex(node)[:...]
1540 # hex(node)[:...]
1541 l = len(id) // 2 * 2 # grab an even number of digits
1541 l = len(id) // 2 * 2 # grab an even number of digits
1542 try:
1542 try:
1543 # we're dropping the last digit, so let's check that it's hex,
1543 # we're dropping the last digit, so let's check that it's hex,
1544 # to avoid the expensive computation below if it's not
1544 # to avoid the expensive computation below if it's not
1545 if len(id) % 2 > 0:
1545 if len(id) % 2 > 0:
1546 if not (id[-1] in hexdigits):
1546 if not (id[-1] in hexdigits):
1547 return None
1547 return None
1548 prefix = bin(id[:l])
1548 prefix = bin(id[:l])
1549 except binascii.Error:
1549 except binascii.Error:
1550 pass
1550 pass
1551 else:
1551 else:
1552 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1552 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1553 nl = [
1553 nl = [
1554 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1554 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1555 ]
1555 ]
1556 if self.nodeconstants.nullhex.startswith(id):
1556 if self.nodeconstants.nullhex.startswith(id):
1557 nl.append(self.nullid)
1557 nl.append(self.nullid)
1558 if len(nl) > 0:
1558 if len(nl) > 0:
1559 if len(nl) == 1 and not maybewdir:
1559 if len(nl) == 1 and not maybewdir:
1560 self._pcache[id] = nl[0]
1560 self._pcache[id] = nl[0]
1561 return nl[0]
1561 return nl[0]
1562 raise error.AmbiguousPrefixLookupError(
1562 raise error.AmbiguousPrefixLookupError(
1563 id, self.display_id, _(b'ambiguous identifier')
1563 id, self.display_id, _(b'ambiguous identifier')
1564 )
1564 )
1565 if maybewdir:
1565 if maybewdir:
1566 raise error.WdirUnsupported
1566 raise error.WdirUnsupported
1567 return None
1567 return None
1568
1568
1569 def lookup(self, id):
1569 def lookup(self, id):
1570 """locate a node based on:
1570 """locate a node based on:
1571 - revision number or str(revision number)
1571 - revision number or str(revision number)
1572 - nodeid or subset of hex nodeid
1572 - nodeid or subset of hex nodeid
1573 """
1573 """
1574 n = self._match(id)
1574 n = self._match(id)
1575 if n is not None:
1575 if n is not None:
1576 return n
1576 return n
1577 n = self._partialmatch(id)
1577 n = self._partialmatch(id)
1578 if n:
1578 if n:
1579 return n
1579 return n
1580
1580
1581 raise error.LookupError(id, self.display_id, _(b'no match found'))
1581 raise error.LookupError(id, self.display_id, _(b'no match found'))
1582
1582
1583 def shortest(self, node, minlength=1):
1583 def shortest(self, node, minlength=1):
1584 """Find the shortest unambiguous prefix that matches node."""
1584 """Find the shortest unambiguous prefix that matches node."""
1585
1585
1586 def isvalid(prefix):
1586 def isvalid(prefix):
1587 try:
1587 try:
1588 matchednode = self._partialmatch(prefix)
1588 matchednode = self._partialmatch(prefix)
1589 except error.AmbiguousPrefixLookupError:
1589 except error.AmbiguousPrefixLookupError:
1590 return False
1590 return False
1591 except error.WdirUnsupported:
1591 except error.WdirUnsupported:
1592 # single 'ff...' match
1592 # single 'ff...' match
1593 return True
1593 return True
1594 if matchednode is None:
1594 if matchednode is None:
1595 raise error.LookupError(node, self.display_id, _(b'no node'))
1595 raise error.LookupError(node, self.display_id, _(b'no node'))
1596 return True
1596 return True
1597
1597
1598 def maybewdir(prefix):
1598 def maybewdir(prefix):
1599 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1599 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1600
1600
1601 hexnode = hex(node)
1601 hexnode = hex(node)
1602
1602
1603 def disambiguate(hexnode, minlength):
1603 def disambiguate(hexnode, minlength):
1604 """Disambiguate against wdirid."""
1604 """Disambiguate against wdirid."""
1605 for length in range(minlength, len(hexnode) + 1):
1605 for length in range(minlength, len(hexnode) + 1):
1606 prefix = hexnode[:length]
1606 prefix = hexnode[:length]
1607 if not maybewdir(prefix):
1607 if not maybewdir(prefix):
1608 return prefix
1608 return prefix
1609
1609
1610 if not getattr(self, 'filteredrevs', None):
1610 if not getattr(self, 'filteredrevs', None):
1611 try:
1611 try:
1612 length = max(self.index.shortest(node), minlength)
1612 length = max(self.index.shortest(node), minlength)
1613 return disambiguate(hexnode, length)
1613 return disambiguate(hexnode, length)
1614 except error.RevlogError:
1614 except error.RevlogError:
1615 if node != self.nodeconstants.wdirid:
1615 if node != self.nodeconstants.wdirid:
1616 raise error.LookupError(
1616 raise error.LookupError(
1617 node, self.display_id, _(b'no node')
1617 node, self.display_id, _(b'no node')
1618 )
1618 )
1619 except AttributeError:
1619 except AttributeError:
1620 # Fall through to pure code
1620 # Fall through to pure code
1621 pass
1621 pass
1622
1622
1623 if node == self.nodeconstants.wdirid:
1623 if node == self.nodeconstants.wdirid:
1624 for length in range(minlength, len(hexnode) + 1):
1624 for length in range(minlength, len(hexnode) + 1):
1625 prefix = hexnode[:length]
1625 prefix = hexnode[:length]
1626 if isvalid(prefix):
1626 if isvalid(prefix):
1627 return prefix
1627 return prefix
1628
1628
1629 for length in range(minlength, len(hexnode) + 1):
1629 for length in range(minlength, len(hexnode) + 1):
1630 prefix = hexnode[:length]
1630 prefix = hexnode[:length]
1631 if isvalid(prefix):
1631 if isvalid(prefix):
1632 return disambiguate(hexnode, length)
1632 return disambiguate(hexnode, length)
1633
1633
1634 def cmp(self, node, text):
1634 def cmp(self, node, text):
1635 """compare text with a given file revision
1635 """compare text with a given file revision
1636
1636
1637 returns True if text is different than what is stored.
1637 returns True if text is different than what is stored.
1638 """
1638 """
1639 p1, p2 = self.parents(node)
1639 p1, p2 = self.parents(node)
1640 return storageutil.hashrevisionsha1(text, p1, p2) != node
1640 return storageutil.hashrevisionsha1(text, p1, p2) != node
1641
1641
1642 def _getsegmentforrevs(self, startrev, endrev, df=None):
1642 def _getsegmentforrevs(self, startrev, endrev, df=None):
1643 """Obtain a segment of raw data corresponding to a range of revisions.
1643 """Obtain a segment of raw data corresponding to a range of revisions.
1644
1644
1645 Accepts the start and end revisions and an optional already-open
1645 Accepts the start and end revisions and an optional already-open
1646 file handle to be used for reading. If the file handle is read, its
1646 file handle to be used for reading. If the file handle is read, its
1647 seek position will not be preserved.
1647 seek position will not be preserved.
1648
1648
1649 Requests for data may be satisfied by a cache.
1649 Requests for data may be satisfied by a cache.
1650
1650
1651 Returns a 2-tuple of (offset, data) for the requested range of
1651 Returns a 2-tuple of (offset, data) for the requested range of
1652 revisions. Offset is the integer offset from the beginning of the
1652 revisions. Offset is the integer offset from the beginning of the
1653 revlog and data is a str or buffer of the raw byte data.
1653 revlog and data is a str or buffer of the raw byte data.
1654
1654
1655 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1655 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1656 to determine where each revision's data begins and ends.
1656 to determine where each revision's data begins and ends.
1657 """
1657 """
1658 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1658 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1659 # (functions are expensive).
1659 # (functions are expensive).
1660 index = self.index
1660 index = self.index
1661 istart = index[startrev]
1661 istart = index[startrev]
1662 start = int(istart[0] >> 16)
1662 start = int(istart[0] >> 16)
1663 if startrev == endrev:
1663 if startrev == endrev:
1664 end = start + istart[1]
1664 end = start + istart[1]
1665 else:
1665 else:
1666 iend = index[endrev]
1666 iend = index[endrev]
1667 end = int(iend[0] >> 16) + iend[1]
1667 end = int(iend[0] >> 16) + iend[1]
1668
1668
1669 if self._inline:
1669 if self._inline:
1670 start += (startrev + 1) * self.index.entry_size
1670 start += (startrev + 1) * self.index.entry_size
1671 end += (endrev + 1) * self.index.entry_size
1671 end += (endrev + 1) * self.index.entry_size
1672 length = end - start
1672 length = end - start
1673
1673
1674 return start, self._segmentfile.read_chunk(start, length, df)
1674 return start, self._segmentfile.read_chunk(start, length, df)
1675
1675
1676 def _chunk(self, rev, df=None):
1676 def _chunk(self, rev, df=None):
1677 """Obtain a single decompressed chunk for a revision.
1677 """Obtain a single decompressed chunk for a revision.
1678
1678
1679 Accepts an integer revision and an optional already-open file handle
1679 Accepts an integer revision and an optional already-open file handle
1680 to be used for reading. If used, the seek position of the file will not
1680 to be used for reading. If used, the seek position of the file will not
1681 be preserved.
1681 be preserved.
1682
1682
1683 Returns a str holding uncompressed data for the requested revision.
1683 Returns a str holding uncompressed data for the requested revision.
1684 """
1684 """
1685 compression_mode = self.index[rev][10]
1685 compression_mode = self.index[rev][10]
1686 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1686 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1687 if compression_mode == COMP_MODE_PLAIN:
1687 if compression_mode == COMP_MODE_PLAIN:
1688 return data
1688 return data
1689 elif compression_mode == COMP_MODE_DEFAULT:
1689 elif compression_mode == COMP_MODE_DEFAULT:
1690 return self._decompressor(data)
1690 return self._decompressor(data)
1691 elif compression_mode == COMP_MODE_INLINE:
1691 elif compression_mode == COMP_MODE_INLINE:
1692 return self.decompress(data)
1692 return self.decompress(data)
1693 else:
1693 else:
1694 msg = b'unknown compression mode %d'
1694 msg = b'unknown compression mode %d'
1695 msg %= compression_mode
1695 msg %= compression_mode
1696 raise error.RevlogError(msg)
1696 raise error.RevlogError(msg)
1697
1697
1698 def _chunks(self, revs, df=None, targetsize=None):
1698 def _chunks(self, revs, df=None, targetsize=None):
1699 """Obtain decompressed chunks for the specified revisions.
1699 """Obtain decompressed chunks for the specified revisions.
1700
1700
1701 Accepts an iterable of numeric revisions that are assumed to be in
1701 Accepts an iterable of numeric revisions that are assumed to be in
1702 ascending order. Also accepts an optional already-open file handle
1702 ascending order. Also accepts an optional already-open file handle
1703 to be used for reading. If used, the seek position of the file will
1703 to be used for reading. If used, the seek position of the file will
1704 not be preserved.
1704 not be preserved.
1705
1705
1706 This function is similar to calling ``self._chunk()`` multiple times,
1706 This function is similar to calling ``self._chunk()`` multiple times,
1707 but is faster.
1707 but is faster.
1708
1708
1709 Returns a list with decompressed data for each requested revision.
1709 Returns a list with decompressed data for each requested revision.
1710 """
1710 """
1711 if not revs:
1711 if not revs:
1712 return []
1712 return []
1713 start = self.start
1713 start = self.start
1714 length = self.length
1714 length = self.length
1715 inline = self._inline
1715 inline = self._inline
1716 iosize = self.index.entry_size
1716 iosize = self.index.entry_size
1717 buffer = util.buffer
1717 buffer = util.buffer
1718
1718
1719 l = []
1719 l = []
1720 ladd = l.append
1720 ladd = l.append
1721
1721
1722 if not self._withsparseread:
1722 if not self._withsparseread:
1723 slicedchunks = (revs,)
1723 slicedchunks = (revs,)
1724 else:
1724 else:
1725 slicedchunks = deltautil.slicechunk(
1725 slicedchunks = deltautil.slicechunk(
1726 self, revs, targetsize=targetsize
1726 self, revs, targetsize=targetsize
1727 )
1727 )
1728
1728
1729 for revschunk in slicedchunks:
1729 for revschunk in slicedchunks:
1730 firstrev = revschunk[0]
1730 firstrev = revschunk[0]
1731 # Skip trailing revisions with empty diff
1731 # Skip trailing revisions with empty diff
1732 for lastrev in revschunk[::-1]:
1732 for lastrev in revschunk[::-1]:
1733 if length(lastrev) != 0:
1733 if length(lastrev) != 0:
1734 break
1734 break
1735
1735
1736 try:
1736 try:
1737 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1737 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1738 except OverflowError:
1738 except OverflowError:
1739 # issue4215 - we can't cache a run of chunks greater than
1739 # issue4215 - we can't cache a run of chunks greater than
1740 # 2G on Windows
1740 # 2G on Windows
1741 return [self._chunk(rev, df=df) for rev in revschunk]
1741 return [self._chunk(rev, df=df) for rev in revschunk]
1742
1742
1743 decomp = self.decompress
1743 decomp = self.decompress
1744 # self._decompressor might be None, but will not be used in that case
1744 # self._decompressor might be None, but will not be used in that case
1745 def_decomp = self._decompressor
1745 def_decomp = self._decompressor
1746 for rev in revschunk:
1746 for rev in revschunk:
1747 chunkstart = start(rev)
1747 chunkstart = start(rev)
1748 if inline:
1748 if inline:
1749 chunkstart += (rev + 1) * iosize
1749 chunkstart += (rev + 1) * iosize
1750 chunklength = length(rev)
1750 chunklength = length(rev)
1751 comp_mode = self.index[rev][10]
1751 comp_mode = self.index[rev][10]
1752 c = buffer(data, chunkstart - offset, chunklength)
1752 c = buffer(data, chunkstart - offset, chunklength)
1753 if comp_mode == COMP_MODE_PLAIN:
1753 if comp_mode == COMP_MODE_PLAIN:
1754 ladd(c)
1754 ladd(c)
1755 elif comp_mode == COMP_MODE_INLINE:
1755 elif comp_mode == COMP_MODE_INLINE:
1756 ladd(decomp(c))
1756 ladd(decomp(c))
1757 elif comp_mode == COMP_MODE_DEFAULT:
1757 elif comp_mode == COMP_MODE_DEFAULT:
1758 ladd(def_decomp(c))
1758 ladd(def_decomp(c))
1759 else:
1759 else:
1760 msg = b'unknown compression mode %d'
1760 msg = b'unknown compression mode %d'
1761 msg %= comp_mode
1761 msg %= comp_mode
1762 raise error.RevlogError(msg)
1762 raise error.RevlogError(msg)
1763
1763
1764 return l
1764 return l
1765
1765
1766 def deltaparent(self, rev):
1766 def deltaparent(self, rev):
1767 """return deltaparent of the given revision"""
1767 """return deltaparent of the given revision"""
1768 base = self.index[rev][3]
1768 base = self.index[rev][3]
1769 if base == rev:
1769 if base == rev:
1770 return nullrev
1770 return nullrev
1771 elif self._generaldelta:
1771 elif self._generaldelta:
1772 return base
1772 return base
1773 else:
1773 else:
1774 return rev - 1
1774 return rev - 1
1775
1775
1776 def issnapshot(self, rev):
1776 def issnapshot(self, rev):
1777 """tells whether rev is a snapshot"""
1777 """tells whether rev is a snapshot"""
1778 if not self._sparserevlog:
1778 if not self._sparserevlog:
1779 return self.deltaparent(rev) == nullrev
1779 return self.deltaparent(rev) == nullrev
1780 elif util.safehasattr(self.index, b'issnapshot'):
1780 elif util.safehasattr(self.index, b'issnapshot'):
1781 # directly assign the method to cache the testing and access
1781 # directly assign the method to cache the testing and access
1782 self.issnapshot = self.index.issnapshot
1782 self.issnapshot = self.index.issnapshot
1783 return self.issnapshot(rev)
1783 return self.issnapshot(rev)
1784 if rev == nullrev:
1784 if rev == nullrev:
1785 return True
1785 return True
1786 entry = self.index[rev]
1786 entry = self.index[rev]
1787 base = entry[3]
1787 base = entry[3]
1788 if base == rev:
1788 if base == rev:
1789 return True
1789 return True
1790 if base == nullrev:
1790 if base == nullrev:
1791 return True
1791 return True
1792 p1 = entry[5]
1792 p1 = entry[5]
1793 while self.length(p1) == 0:
1793 while self.length(p1) == 0:
1794 b = self.deltaparent(p1)
1794 b = self.deltaparent(p1)
1795 if b == p1:
1795 if b == p1:
1796 break
1796 break
1797 p1 = b
1797 p1 = b
1798 p2 = entry[6]
1798 p2 = entry[6]
1799 while self.length(p2) == 0:
1799 while self.length(p2) == 0:
1800 b = self.deltaparent(p2)
1800 b = self.deltaparent(p2)
1801 if b == p2:
1801 if b == p2:
1802 break
1802 break
1803 p2 = b
1803 p2 = b
1804 if base == p1 or base == p2:
1804 if base == p1 or base == p2:
1805 return False
1805 return False
1806 return self.issnapshot(base)
1806 return self.issnapshot(base)
1807
1807
1808 def snapshotdepth(self, rev):
1808 def snapshotdepth(self, rev):
1809 """number of snapshot in the chain before this one"""
1809 """number of snapshot in the chain before this one"""
1810 if not self.issnapshot(rev):
1810 if not self.issnapshot(rev):
1811 raise error.ProgrammingError(b'revision %d not a snapshot')
1811 raise error.ProgrammingError(b'revision %d not a snapshot')
1812 return len(self._deltachain(rev)[0]) - 1
1812 return len(self._deltachain(rev)[0]) - 1
1813
1813
1814 def revdiff(self, rev1, rev2):
1814 def revdiff(self, rev1, rev2):
1815 """return or calculate a delta between two revisions
1815 """return or calculate a delta between two revisions
1816
1816
1817 The delta calculated is in binary form and is intended to be written to
1817 The delta calculated is in binary form and is intended to be written to
1818 revlog data directly. So this function needs raw revision data.
1818 revlog data directly. So this function needs raw revision data.
1819 """
1819 """
1820 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1820 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1821 return bytes(self._chunk(rev2))
1821 return bytes(self._chunk(rev2))
1822
1822
1823 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1823 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1824
1824
1825 def revision(self, nodeorrev, _df=None):
1825 def revision(self, nodeorrev, _df=None):
1826 """return an uncompressed revision of a given node or revision
1826 """return an uncompressed revision of a given node or revision
1827 number.
1827 number.
1828
1828
1829 _df - an existing file handle to read from. (internal-only)
1829 _df - an existing file handle to read from. (internal-only)
1830 """
1830 """
1831 return self._revisiondata(nodeorrev, _df)
1831 return self._revisiondata(nodeorrev, _df)
1832
1832
1833 def sidedata(self, nodeorrev, _df=None):
1833 def sidedata(self, nodeorrev, _df=None):
1834 """a map of extra data related to the changeset but not part of the hash
1834 """a map of extra data related to the changeset but not part of the hash
1835
1835
1836 This function currently return a dictionary. However, more advanced
1836 This function currently return a dictionary. However, more advanced
1837 mapping object will likely be used in the future for a more
1837 mapping object will likely be used in the future for a more
1838 efficient/lazy code.
1838 efficient/lazy code.
1839 """
1839 """
1840 # deal with <nodeorrev> argument type
1840 # deal with <nodeorrev> argument type
1841 if isinstance(nodeorrev, int):
1841 if isinstance(nodeorrev, int):
1842 rev = nodeorrev
1842 rev = nodeorrev
1843 else:
1843 else:
1844 rev = self.rev(nodeorrev)
1844 rev = self.rev(nodeorrev)
1845 return self._sidedata(rev)
1845 return self._sidedata(rev)
1846
1846
1847 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1847 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1848 # deal with <nodeorrev> argument type
1848 # deal with <nodeorrev> argument type
1849 if isinstance(nodeorrev, int):
1849 if isinstance(nodeorrev, int):
1850 rev = nodeorrev
1850 rev = nodeorrev
1851 node = self.node(rev)
1851 node = self.node(rev)
1852 else:
1852 else:
1853 node = nodeorrev
1853 node = nodeorrev
1854 rev = None
1854 rev = None
1855
1855
1856 # fast path the special `nullid` rev
1856 # fast path the special `nullid` rev
1857 if node == self.nullid:
1857 if node == self.nullid:
1858 return b""
1858 return b""
1859
1859
1860 # ``rawtext`` is the text as stored inside the revlog. Might be the
1860 # ``rawtext`` is the text as stored inside the revlog. Might be the
1861 # revision or might need to be processed to retrieve the revision.
1861 # revision or might need to be processed to retrieve the revision.
1862 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1862 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1863
1863
1864 if raw and validated:
1864 if raw and validated:
1865 # if we don't want to process the raw text and that raw
1865 # if we don't want to process the raw text and that raw
1866 # text is cached, we can exit early.
1866 # text is cached, we can exit early.
1867 return rawtext
1867 return rawtext
1868 if rev is None:
1868 if rev is None:
1869 rev = self.rev(node)
1869 rev = self.rev(node)
1870 # the revlog's flag for this revision
1870 # the revlog's flag for this revision
1871 # (usually alter its state or content)
1871 # (usually alter its state or content)
1872 flags = self.flags(rev)
1872 flags = self.flags(rev)
1873
1873
1874 if validated and flags == REVIDX_DEFAULT_FLAGS:
1874 if validated and flags == REVIDX_DEFAULT_FLAGS:
1875 # no extra flags set, no flag processor runs, text = rawtext
1875 # no extra flags set, no flag processor runs, text = rawtext
1876 return rawtext
1876 return rawtext
1877
1877
1878 if raw:
1878 if raw:
1879 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1879 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1880 text = rawtext
1880 text = rawtext
1881 else:
1881 else:
1882 r = flagutil.processflagsread(self, rawtext, flags)
1882 r = flagutil.processflagsread(self, rawtext, flags)
1883 text, validatehash = r
1883 text, validatehash = r
1884 if validatehash:
1884 if validatehash:
1885 self.checkhash(text, node, rev=rev)
1885 self.checkhash(text, node, rev=rev)
1886 if not validated:
1886 if not validated:
1887 self._revisioncache = (node, rev, rawtext)
1887 self._revisioncache = (node, rev, rawtext)
1888
1888
1889 return text
1889 return text
1890
1890
1891 def _rawtext(self, node, rev, _df=None):
1891 def _rawtext(self, node, rev, _df=None):
1892 """return the possibly unvalidated rawtext for a revision
1892 """return the possibly unvalidated rawtext for a revision
1893
1893
1894 returns (rev, rawtext, validated)
1894 returns (rev, rawtext, validated)
1895 """
1895 """
1896
1896
1897 # revision in the cache (could be useful to apply delta)
1897 # revision in the cache (could be useful to apply delta)
1898 cachedrev = None
1898 cachedrev = None
1899 # An intermediate text to apply deltas to
1899 # An intermediate text to apply deltas to
1900 basetext = None
1900 basetext = None
1901
1901
1902 # Check if we have the entry in cache
1902 # Check if we have the entry in cache
1903 # The cache entry looks like (node, rev, rawtext)
1903 # The cache entry looks like (node, rev, rawtext)
1904 if self._revisioncache:
1904 if self._revisioncache:
1905 if self._revisioncache[0] == node:
1905 if self._revisioncache[0] == node:
1906 return (rev, self._revisioncache[2], True)
1906 return (rev, self._revisioncache[2], True)
1907 cachedrev = self._revisioncache[1]
1907 cachedrev = self._revisioncache[1]
1908
1908
1909 if rev is None:
1909 if rev is None:
1910 rev = self.rev(node)
1910 rev = self.rev(node)
1911
1911
1912 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1912 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1913 if stopped:
1913 if stopped:
1914 basetext = self._revisioncache[2]
1914 basetext = self._revisioncache[2]
1915
1915
1916 # drop cache to save memory, the caller is expected to
1916 # drop cache to save memory, the caller is expected to
1917 # update self._revisioncache after validating the text
1917 # update self._revisioncache after validating the text
1918 self._revisioncache = None
1918 self._revisioncache = None
1919
1919
1920 targetsize = None
1920 targetsize = None
1921 rawsize = self.index[rev][2]
1921 rawsize = self.index[rev][2]
1922 if 0 <= rawsize:
1922 if 0 <= rawsize:
1923 targetsize = 4 * rawsize
1923 targetsize = 4 * rawsize
1924
1924
1925 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1925 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1926 if basetext is None:
1926 if basetext is None:
1927 basetext = bytes(bins[0])
1927 basetext = bytes(bins[0])
1928 bins = bins[1:]
1928 bins = bins[1:]
1929
1929
1930 rawtext = mdiff.patches(basetext, bins)
1930 rawtext = mdiff.patches(basetext, bins)
1931 del basetext # let us have a chance to free memory early
1931 del basetext # let us have a chance to free memory early
1932 return (rev, rawtext, False)
1932 return (rev, rawtext, False)
1933
1933
1934 def _sidedata(self, rev):
1934 def _sidedata(self, rev):
1935 """Return the sidedata for a given revision number."""
1935 """Return the sidedata for a given revision number."""
1936 index_entry = self.index[rev]
1936 index_entry = self.index[rev]
1937 sidedata_offset = index_entry[8]
1937 sidedata_offset = index_entry[8]
1938 sidedata_size = index_entry[9]
1938 sidedata_size = index_entry[9]
1939
1939
1940 if self._inline:
1940 if self._inline:
1941 sidedata_offset += self.index.entry_size * (1 + rev)
1941 sidedata_offset += self.index.entry_size * (1 + rev)
1942 if sidedata_size == 0:
1942 if sidedata_size == 0:
1943 return {}
1943 return {}
1944
1944
1945 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1945 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1946 filename = self._sidedatafile
1946 filename = self._sidedatafile
1947 end = self._docket.sidedata_end
1947 end = self._docket.sidedata_end
1948 offset = sidedata_offset
1948 offset = sidedata_offset
1949 length = sidedata_size
1949 length = sidedata_size
1950 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1950 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1951 raise error.RevlogError(m)
1951 raise error.RevlogError(m)
1952
1952
1953 comp_segment = self._segmentfile_sidedata.read_chunk(
1953 comp_segment = self._segmentfile_sidedata.read_chunk(
1954 sidedata_offset, sidedata_size
1954 sidedata_offset, sidedata_size
1955 )
1955 )
1956
1956
1957 comp = self.index[rev][11]
1957 comp = self.index[rev][11]
1958 if comp == COMP_MODE_PLAIN:
1958 if comp == COMP_MODE_PLAIN:
1959 segment = comp_segment
1959 segment = comp_segment
1960 elif comp == COMP_MODE_DEFAULT:
1960 elif comp == COMP_MODE_DEFAULT:
1961 segment = self._decompressor(comp_segment)
1961 segment = self._decompressor(comp_segment)
1962 elif comp == COMP_MODE_INLINE:
1962 elif comp == COMP_MODE_INLINE:
1963 segment = self.decompress(comp_segment)
1963 segment = self.decompress(comp_segment)
1964 else:
1964 else:
1965 msg = b'unknown compression mode %d'
1965 msg = b'unknown compression mode %d'
1966 msg %= comp
1966 msg %= comp
1967 raise error.RevlogError(msg)
1967 raise error.RevlogError(msg)
1968
1968
1969 sidedata = sidedatautil.deserialize_sidedata(segment)
1969 sidedata = sidedatautil.deserialize_sidedata(segment)
1970 return sidedata
1970 return sidedata
1971
1971
1972 def rawdata(self, nodeorrev, _df=None):
1972 def rawdata(self, nodeorrev, _df=None):
1973 """return an uncompressed raw data of a given node or revision number.
1973 """return an uncompressed raw data of a given node or revision number.
1974
1974
1975 _df - an existing file handle to read from. (internal-only)
1975 _df - an existing file handle to read from. (internal-only)
1976 """
1976 """
1977 return self._revisiondata(nodeorrev, _df, raw=True)
1977 return self._revisiondata(nodeorrev, _df, raw=True)
1978
1978
1979 def hash(self, text, p1, p2):
1979 def hash(self, text, p1, p2):
1980 """Compute a node hash.
1980 """Compute a node hash.
1981
1981
1982 Available as a function so that subclasses can replace the hash
1982 Available as a function so that subclasses can replace the hash
1983 as needed.
1983 as needed.
1984 """
1984 """
1985 return storageutil.hashrevisionsha1(text, p1, p2)
1985 return storageutil.hashrevisionsha1(text, p1, p2)
1986
1986
1987 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1987 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1988 """Check node hash integrity.
1988 """Check node hash integrity.
1989
1989
1990 Available as a function so that subclasses can extend hash mismatch
1990 Available as a function so that subclasses can extend hash mismatch
1991 behaviors as needed.
1991 behaviors as needed.
1992 """
1992 """
1993 try:
1993 try:
1994 if p1 is None and p2 is None:
1994 if p1 is None and p2 is None:
1995 p1, p2 = self.parents(node)
1995 p1, p2 = self.parents(node)
1996 if node != self.hash(text, p1, p2):
1996 if node != self.hash(text, p1, p2):
1997 # Clear the revision cache on hash failure. The revision cache
1997 # Clear the revision cache on hash failure. The revision cache
1998 # only stores the raw revision and clearing the cache does have
1998 # only stores the raw revision and clearing the cache does have
1999 # the side-effect that we won't have a cache hit when the raw
1999 # the side-effect that we won't have a cache hit when the raw
2000 # revision data is accessed. But this case should be rare and
2000 # revision data is accessed. But this case should be rare and
2001 # it is extra work to teach the cache about the hash
2001 # it is extra work to teach the cache about the hash
2002 # verification state.
2002 # verification state.
2003 if self._revisioncache and self._revisioncache[0] == node:
2003 if self._revisioncache and self._revisioncache[0] == node:
2004 self._revisioncache = None
2004 self._revisioncache = None
2005
2005
2006 revornode = rev
2006 revornode = rev
2007 if revornode is None:
2007 if revornode is None:
2008 revornode = templatefilters.short(hex(node))
2008 revornode = templatefilters.short(hex(node))
2009 raise error.RevlogError(
2009 raise error.RevlogError(
2010 _(b"integrity check failed on %s:%s")
2010 _(b"integrity check failed on %s:%s")
2011 % (self.display_id, pycompat.bytestr(revornode))
2011 % (self.display_id, pycompat.bytestr(revornode))
2012 )
2012 )
2013 except error.RevlogError:
2013 except error.RevlogError:
2014 if self._censorable and storageutil.iscensoredtext(text):
2014 if self._censorable and storageutil.iscensoredtext(text):
2015 raise error.CensoredNodeError(self.display_id, node, text)
2015 raise error.CensoredNodeError(self.display_id, node, text)
2016 raise
2016 raise
2017
2017
2018 def _enforceinlinesize(self, tr):
2018 def _enforceinlinesize(self, tr):
2019 """Check if the revlog is too big for inline and convert if so.
2019 """Check if the revlog is too big for inline and convert if so.
2020
2020
2021 This should be called after revisions are added to the revlog. If the
2021 This should be called after revisions are added to the revlog. If the
2022 revlog has grown too large to be an inline revlog, it will convert it
2022 revlog has grown too large to be an inline revlog, it will convert it
2023 to use multiple index and data files.
2023 to use multiple index and data files.
2024 """
2024 """
2025 tiprev = len(self) - 1
2025 tiprev = len(self) - 1
2026 total_size = self.start(tiprev) + self.length(tiprev)
2026 total_size = self.start(tiprev) + self.length(tiprev)
2027 if not self._inline or total_size < _maxinline:
2027 if not self._inline or total_size < _maxinline:
2028 return
2028 return
2029
2029
2030 troffset = tr.findoffset(self._indexfile)
2030 troffset = tr.findoffset(self._indexfile)
2031 if troffset is None:
2031 if troffset is None:
2032 raise error.RevlogError(
2032 raise error.RevlogError(
2033 _(b"%s not found in the transaction") % self._indexfile
2033 _(b"%s not found in the transaction") % self._indexfile
2034 )
2034 )
2035 trindex = None
2035 trindex = None
2036 tr.add(self._datafile, 0)
2036 tr.add(self._datafile, 0)
2037
2037
2038 existing_handles = False
2038 existing_handles = False
2039 if self._writinghandles is not None:
2039 if self._writinghandles is not None:
2040 existing_handles = True
2040 existing_handles = True
2041 fp = self._writinghandles[0]
2041 fp = self._writinghandles[0]
2042 fp.flush()
2042 fp.flush()
2043 fp.close()
2043 fp.close()
2044 # We can't use the cached file handle after close(). So prevent
2044 # We can't use the cached file handle after close(). So prevent
2045 # its usage.
2045 # its usage.
2046 self._writinghandles = None
2046 self._writinghandles = None
2047 self._segmentfile.writing_handle = None
2047 self._segmentfile.writing_handle = None
2048 # No need to deal with sidedata writing handle as it is only
2048 # No need to deal with sidedata writing handle as it is only
2049 # relevant with revlog-v2 which is never inline, not reaching
2049 # relevant with revlog-v2 which is never inline, not reaching
2050 # this code
2050 # this code
2051
2051
2052 new_dfh = self._datafp(b'w+')
2052 new_dfh = self._datafp(b'w+')
2053 new_dfh.truncate(0) # drop any potentially existing data
2053 new_dfh.truncate(0) # drop any potentially existing data
2054 try:
2054 try:
2055 with self._indexfp() as read_ifh:
2055 with self._indexfp() as read_ifh:
2056 for r in self:
2056 for r in self:
2057 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2057 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2058 if (
2058 if (
2059 trindex is None
2059 trindex is None
2060 and troffset
2060 and troffset
2061 <= self.start(r) + r * self.index.entry_size
2061 <= self.start(r) + r * self.index.entry_size
2062 ):
2062 ):
2063 trindex = r
2063 trindex = r
2064 new_dfh.flush()
2064 new_dfh.flush()
2065
2065
2066 if trindex is None:
2066 if trindex is None:
2067 trindex = 0
2067 trindex = 0
2068
2068
2069 with self.__index_new_fp() as fp:
2069 with self.__index_new_fp() as fp:
2070 self._format_flags &= ~FLAG_INLINE_DATA
2070 self._format_flags &= ~FLAG_INLINE_DATA
2071 self._inline = False
2071 self._inline = False
2072 for i in self:
2072 for i in self:
2073 e = self.index.entry_binary(i)
2073 e = self.index.entry_binary(i)
2074 if i == 0 and self._docket is None:
2074 if i == 0 and self._docket is None:
2075 header = self._format_flags | self._format_version
2075 header = self._format_flags | self._format_version
2076 header = self.index.pack_header(header)
2076 header = self.index.pack_header(header)
2077 e = header + e
2077 e = header + e
2078 fp.write(e)
2078 fp.write(e)
2079 if self._docket is not None:
2079 if self._docket is not None:
2080 self._docket.index_end = fp.tell()
2080 self._docket.index_end = fp.tell()
2081
2081
2082 # There is a small transactional race here. If the rename of
2082 # There is a small transactional race here. If the rename of
2083 # the index fails, we should remove the datafile. It is more
2083 # the index fails, we should remove the datafile. It is more
2084 # important to ensure that the data file is not truncated
2084 # important to ensure that the data file is not truncated
2085 # when the index is replaced as otherwise data is lost.
2085 # when the index is replaced as otherwise data is lost.
2086 tr.replace(self._datafile, self.start(trindex))
2086 tr.replace(self._datafile, self.start(trindex))
2087
2087
2088 # the temp file replace the real index when we exit the context
2088 # the temp file replace the real index when we exit the context
2089 # manager
2089 # manager
2090
2090
2091 tr.replace(self._indexfile, trindex * self.index.entry_size)
2091 tr.replace(self._indexfile, trindex * self.index.entry_size)
2092 nodemaputil.setup_persistent_nodemap(tr, self)
2092 nodemaputil.setup_persistent_nodemap(tr, self)
2093 self._segmentfile = randomaccessfile.randomaccessfile(
2093 self._segmentfile = randomaccessfile.randomaccessfile(
2094 self.opener,
2094 self.opener,
2095 self._datafile,
2095 self._datafile,
2096 self._chunkcachesize,
2096 self._chunkcachesize,
2097 )
2097 )
2098
2098
2099 if existing_handles:
2099 if existing_handles:
2100 # switched from inline to conventional reopen the index
2100 # switched from inline to conventional reopen the index
2101 ifh = self.__index_write_fp()
2101 ifh = self.__index_write_fp()
2102 self._writinghandles = (ifh, new_dfh, None)
2102 self._writinghandles = (ifh, new_dfh, None)
2103 self._segmentfile.writing_handle = new_dfh
2103 self._segmentfile.writing_handle = new_dfh
2104 new_dfh = None
2104 new_dfh = None
2105 # No need to deal with sidedata writing handle as it is only
2105 # No need to deal with sidedata writing handle as it is only
2106 # relevant with revlog-v2 which is never inline, not reaching
2106 # relevant with revlog-v2 which is never inline, not reaching
2107 # this code
2107 # this code
2108 finally:
2108 finally:
2109 if new_dfh is not None:
2109 if new_dfh is not None:
2110 new_dfh.close()
2110 new_dfh.close()
2111
2111
2112 def _nodeduplicatecallback(self, transaction, node):
2112 def _nodeduplicatecallback(self, transaction, node):
2113 """called when trying to add a node already stored."""
2113 """called when trying to add a node already stored."""
2114
2114
2115 @contextlib.contextmanager
2115 @contextlib.contextmanager
2116 def reading(self):
2116 def reading(self):
2117 """Context manager that keeps data and sidedata files open for reading"""
2117 """Context manager that keeps data and sidedata files open for reading"""
2118 with self._segmentfile.reading():
2118 with self._segmentfile.reading():
2119 with self._segmentfile_sidedata.reading():
2119 with self._segmentfile_sidedata.reading():
2120 yield
2120 yield
2121
2121
2122 @contextlib.contextmanager
2122 @contextlib.contextmanager
2123 def _writing(self, transaction):
2123 def _writing(self, transaction):
2124 if self._trypending:
2124 if self._trypending:
2125 msg = b'try to write in a `trypending` revlog: %s'
2125 msg = b'try to write in a `trypending` revlog: %s'
2126 msg %= self.display_id
2126 msg %= self.display_id
2127 raise error.ProgrammingError(msg)
2127 raise error.ProgrammingError(msg)
2128 if self._writinghandles is not None:
2128 if self._writinghandles is not None:
2129 yield
2129 yield
2130 else:
2130 else:
2131 ifh = dfh = sdfh = None
2131 ifh = dfh = sdfh = None
2132 try:
2132 try:
2133 r = len(self)
2133 r = len(self)
2134 # opening the data file.
2134 # opening the data file.
2135 dsize = 0
2135 dsize = 0
2136 if r:
2136 if r:
2137 dsize = self.end(r - 1)
2137 dsize = self.end(r - 1)
2138 dfh = None
2138 dfh = None
2139 if not self._inline:
2139 if not self._inline:
2140 try:
2140 try:
2141 dfh = self._datafp(b"r+")
2141 dfh = self._datafp(b"r+")
2142 if self._docket is None:
2142 if self._docket is None:
2143 dfh.seek(0, os.SEEK_END)
2143 dfh.seek(0, os.SEEK_END)
2144 else:
2144 else:
2145 dfh.seek(self._docket.data_end, os.SEEK_SET)
2145 dfh.seek(self._docket.data_end, os.SEEK_SET)
2146 except FileNotFoundError:
2146 except FileNotFoundError:
2147 dfh = self._datafp(b"w+")
2147 dfh = self._datafp(b"w+")
2148 transaction.add(self._datafile, dsize)
2148 transaction.add(self._datafile, dsize)
2149 if self._sidedatafile is not None:
2149 if self._sidedatafile is not None:
2150 # revlog-v2 does not inline, help Pytype
2150 # revlog-v2 does not inline, help Pytype
2151 assert dfh is not None
2151 assert dfh is not None
2152 try:
2152 try:
2153 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2153 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2154 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2154 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2155 except FileNotFoundError:
2155 except FileNotFoundError:
2156 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2156 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2157 transaction.add(
2157 transaction.add(
2158 self._sidedatafile, self._docket.sidedata_end
2158 self._sidedatafile, self._docket.sidedata_end
2159 )
2159 )
2160
2160
2161 # opening the index file.
2161 # opening the index file.
2162 isize = r * self.index.entry_size
2162 isize = r * self.index.entry_size
2163 ifh = self.__index_write_fp()
2163 ifh = self.__index_write_fp()
2164 if self._inline:
2164 if self._inline:
2165 transaction.add(self._indexfile, dsize + isize)
2165 transaction.add(self._indexfile, dsize + isize)
2166 else:
2166 else:
2167 transaction.add(self._indexfile, isize)
2167 transaction.add(self._indexfile, isize)
2168 # exposing all file handle for writing.
2168 # exposing all file handle for writing.
2169 self._writinghandles = (ifh, dfh, sdfh)
2169 self._writinghandles = (ifh, dfh, sdfh)
2170 self._segmentfile.writing_handle = ifh if self._inline else dfh
2170 self._segmentfile.writing_handle = ifh if self._inline else dfh
2171 self._segmentfile_sidedata.writing_handle = sdfh
2171 self._segmentfile_sidedata.writing_handle = sdfh
2172 yield
2172 yield
2173 if self._docket is not None:
2173 if self._docket is not None:
2174 self._write_docket(transaction)
2174 self._write_docket(transaction)
2175 finally:
2175 finally:
2176 self._writinghandles = None
2176 self._writinghandles = None
2177 self._segmentfile.writing_handle = None
2177 self._segmentfile.writing_handle = None
2178 self._segmentfile_sidedata.writing_handle = None
2178 self._segmentfile_sidedata.writing_handle = None
2179 if dfh is not None:
2179 if dfh is not None:
2180 dfh.close()
2180 dfh.close()
2181 if sdfh is not None:
2181 if sdfh is not None:
2182 sdfh.close()
2182 sdfh.close()
2183 # closing the index file last to avoid exposing referent to
2183 # closing the index file last to avoid exposing referent to
2184 # potential unflushed data content.
2184 # potential unflushed data content.
2185 if ifh is not None:
2185 if ifh is not None:
2186 ifh.close()
2186 ifh.close()
2187
2187
2188 def _write_docket(self, transaction):
2188 def _write_docket(self, transaction):
2189 """write the current docket on disk
2189 """write the current docket on disk
2190
2190
2191 Exist as a method to help changelog to implement transaction logic
2191 Exist as a method to help changelog to implement transaction logic
2192
2192
2193 We could also imagine using the same transaction logic for all revlog
2193 We could also imagine using the same transaction logic for all revlog
2194 since docket are cheap."""
2194 since docket are cheap."""
2195 self._docket.write(transaction)
2195 self._docket.write(transaction)
2196
2196
2197 def addrevision(
2197 def addrevision(
2198 self,
2198 self,
2199 text,
2199 text,
2200 transaction,
2200 transaction,
2201 link,
2201 link,
2202 p1,
2202 p1,
2203 p2,
2203 p2,
2204 cachedelta=None,
2204 cachedelta=None,
2205 node=None,
2205 node=None,
2206 flags=REVIDX_DEFAULT_FLAGS,
2206 flags=REVIDX_DEFAULT_FLAGS,
2207 deltacomputer=None,
2207 deltacomputer=None,
2208 sidedata=None,
2208 sidedata=None,
2209 ):
2209 ):
2210 """add a revision to the log
2210 """add a revision to the log
2211
2211
2212 text - the revision data to add
2212 text - the revision data to add
2213 transaction - the transaction object used for rollback
2213 transaction - the transaction object used for rollback
2214 link - the linkrev data to add
2214 link - the linkrev data to add
2215 p1, p2 - the parent nodeids of the revision
2215 p1, p2 - the parent nodeids of the revision
2216 cachedelta - an optional precomputed delta
2216 cachedelta - an optional precomputed delta
2217 node - nodeid of revision; typically node is not specified, and it is
2217 node - nodeid of revision; typically node is not specified, and it is
2218 computed by default as hash(text, p1, p2), however subclasses might
2218 computed by default as hash(text, p1, p2), however subclasses might
2219 use different hashing method (and override checkhash() in such case)
2219 use different hashing method (and override checkhash() in such case)
2220 flags - the known flags to set on the revision
2220 flags - the known flags to set on the revision
2221 deltacomputer - an optional deltacomputer instance shared between
2221 deltacomputer - an optional deltacomputer instance shared between
2222 multiple calls
2222 multiple calls
2223 """
2223 """
2224 if link == nullrev:
2224 if link == nullrev:
2225 raise error.RevlogError(
2225 raise error.RevlogError(
2226 _(b"attempted to add linkrev -1 to %s") % self.display_id
2226 _(b"attempted to add linkrev -1 to %s") % self.display_id
2227 )
2227 )
2228
2228
2229 if sidedata is None:
2229 if sidedata is None:
2230 sidedata = {}
2230 sidedata = {}
2231 elif sidedata and not self.hassidedata:
2231 elif sidedata and not self.hassidedata:
2232 raise error.ProgrammingError(
2232 raise error.ProgrammingError(
2233 _(b"trying to add sidedata to a revlog who don't support them")
2233 _(b"trying to add sidedata to a revlog who don't support them")
2234 )
2234 )
2235
2235
2236 if flags:
2236 if flags:
2237 node = node or self.hash(text, p1, p2)
2237 node = node or self.hash(text, p1, p2)
2238
2238
2239 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2239 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2240
2240
2241 # If the flag processor modifies the revision data, ignore any provided
2241 # If the flag processor modifies the revision data, ignore any provided
2242 # cachedelta.
2242 # cachedelta.
2243 if rawtext != text:
2243 if rawtext != text:
2244 cachedelta = None
2244 cachedelta = None
2245
2245
2246 if len(rawtext) > _maxentrysize:
2246 if len(rawtext) > _maxentrysize:
2247 raise error.RevlogError(
2247 raise error.RevlogError(
2248 _(
2248 _(
2249 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2249 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2250 )
2250 )
2251 % (self.display_id, len(rawtext))
2251 % (self.display_id, len(rawtext))
2252 )
2252 )
2253
2253
2254 node = node or self.hash(rawtext, p1, p2)
2254 node = node or self.hash(rawtext, p1, p2)
2255 rev = self.index.get_rev(node)
2255 rev = self.index.get_rev(node)
2256 if rev is not None:
2256 if rev is not None:
2257 return rev
2257 return rev
2258
2258
2259 if validatehash:
2259 if validatehash:
2260 self.checkhash(rawtext, node, p1=p1, p2=p2)
2260 self.checkhash(rawtext, node, p1=p1, p2=p2)
2261
2261
2262 return self.addrawrevision(
2262 return self.addrawrevision(
2263 rawtext,
2263 rawtext,
2264 transaction,
2264 transaction,
2265 link,
2265 link,
2266 p1,
2266 p1,
2267 p2,
2267 p2,
2268 node,
2268 node,
2269 flags,
2269 flags,
2270 cachedelta=cachedelta,
2270 cachedelta=cachedelta,
2271 deltacomputer=deltacomputer,
2271 deltacomputer=deltacomputer,
2272 sidedata=sidedata,
2272 sidedata=sidedata,
2273 )
2273 )
2274
2274
2275 def addrawrevision(
2275 def addrawrevision(
2276 self,
2276 self,
2277 rawtext,
2277 rawtext,
2278 transaction,
2278 transaction,
2279 link,
2279 link,
2280 p1,
2280 p1,
2281 p2,
2281 p2,
2282 node,
2282 node,
2283 flags,
2283 flags,
2284 cachedelta=None,
2284 cachedelta=None,
2285 deltacomputer=None,
2285 deltacomputer=None,
2286 sidedata=None,
2286 sidedata=None,
2287 ):
2287 ):
2288 """add a raw revision with known flags, node and parents
2288 """add a raw revision with known flags, node and parents
2289 useful when reusing a revision not stored in this revlog (ex: received
2289 useful when reusing a revision not stored in this revlog (ex: received
2290 over wire, or read from an external bundle).
2290 over wire, or read from an external bundle).
2291 """
2291 """
2292 with self._writing(transaction):
2292 with self._writing(transaction):
2293 return self._addrevision(
2293 return self._addrevision(
2294 node,
2294 node,
2295 rawtext,
2295 rawtext,
2296 transaction,
2296 transaction,
2297 link,
2297 link,
2298 p1,
2298 p1,
2299 p2,
2299 p2,
2300 flags,
2300 flags,
2301 cachedelta,
2301 cachedelta,
2302 deltacomputer=deltacomputer,
2302 deltacomputer=deltacomputer,
2303 sidedata=sidedata,
2303 sidedata=sidedata,
2304 )
2304 )
2305
2305
2306 def compress(self, data):
2306 def compress(self, data):
2307 """Generate a possibly-compressed representation of data."""
2307 """Generate a possibly-compressed representation of data."""
2308 if not data:
2308 if not data:
2309 return b'', data
2309 return b'', data
2310
2310
2311 compressed = self._compressor.compress(data)
2311 compressed = self._compressor.compress(data)
2312
2312
2313 if compressed:
2313 if compressed:
2314 # The revlog compressor added the header in the returned data.
2314 # The revlog compressor added the header in the returned data.
2315 return b'', compressed
2315 return b'', compressed
2316
2316
2317 if data[0:1] == b'\0':
2317 if data[0:1] == b'\0':
2318 return b'', data
2318 return b'', data
2319 return b'u', data
2319 return b'u', data
2320
2320
2321 def decompress(self, data):
2321 def decompress(self, data):
2322 """Decompress a revlog chunk.
2322 """Decompress a revlog chunk.
2323
2323
2324 The chunk is expected to begin with a header identifying the
2324 The chunk is expected to begin with a header identifying the
2325 format type so it can be routed to an appropriate decompressor.
2325 format type so it can be routed to an appropriate decompressor.
2326 """
2326 """
2327 if not data:
2327 if not data:
2328 return data
2328 return data
2329
2329
2330 # Revlogs are read much more frequently than they are written and many
2330 # Revlogs are read much more frequently than they are written and many
2331 # chunks only take microseconds to decompress, so performance is
2331 # chunks only take microseconds to decompress, so performance is
2332 # important here.
2332 # important here.
2333 #
2333 #
2334 # We can make a few assumptions about revlogs:
2334 # We can make a few assumptions about revlogs:
2335 #
2335 #
2336 # 1) the majority of chunks will be compressed (as opposed to inline
2336 # 1) the majority of chunks will be compressed (as opposed to inline
2337 # raw data).
2337 # raw data).
2338 # 2) decompressing *any* data will likely by at least 10x slower than
2338 # 2) decompressing *any* data will likely by at least 10x slower than
2339 # returning raw inline data.
2339 # returning raw inline data.
2340 # 3) we want to prioritize common and officially supported compression
2340 # 3) we want to prioritize common and officially supported compression
2341 # engines
2341 # engines
2342 #
2342 #
2343 # It follows that we want to optimize for "decompress compressed data
2343 # It follows that we want to optimize for "decompress compressed data
2344 # when encoded with common and officially supported compression engines"
2344 # when encoded with common and officially supported compression engines"
2345 # case over "raw data" and "data encoded by less common or non-official
2345 # case over "raw data" and "data encoded by less common or non-official
2346 # compression engines." That is why we have the inline lookup first
2346 # compression engines." That is why we have the inline lookup first
2347 # followed by the compengines lookup.
2347 # followed by the compengines lookup.
2348 #
2348 #
2349 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2349 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2350 # compressed chunks. And this matters for changelog and manifest reads.
2350 # compressed chunks. And this matters for changelog and manifest reads.
2351 t = data[0:1]
2351 t = data[0:1]
2352
2352
2353 if t == b'x':
2353 if t == b'x':
2354 try:
2354 try:
2355 return _zlibdecompress(data)
2355 return _zlibdecompress(data)
2356 except zlib.error as e:
2356 except zlib.error as e:
2357 raise error.RevlogError(
2357 raise error.RevlogError(
2358 _(b'revlog decompress error: %s')
2358 _(b'revlog decompress error: %s')
2359 % stringutil.forcebytestr(e)
2359 % stringutil.forcebytestr(e)
2360 )
2360 )
2361 # '\0' is more common than 'u' so it goes first.
2361 # '\0' is more common than 'u' so it goes first.
2362 elif t == b'\0':
2362 elif t == b'\0':
2363 return data
2363 return data
2364 elif t == b'u':
2364 elif t == b'u':
2365 return util.buffer(data, 1)
2365 return util.buffer(data, 1)
2366
2366
2367 compressor = self._get_decompressor(t)
2367 compressor = self._get_decompressor(t)
2368
2368
2369 return compressor.decompress(data)
2369 return compressor.decompress(data)
2370
2370
2371 def _addrevision(
2371 def _addrevision(
2372 self,
2372 self,
2373 node,
2373 node,
2374 rawtext,
2374 rawtext,
2375 transaction,
2375 transaction,
2376 link,
2376 link,
2377 p1,
2377 p1,
2378 p2,
2378 p2,
2379 flags,
2379 flags,
2380 cachedelta,
2380 cachedelta,
2381 alwayscache=False,
2381 alwayscache=False,
2382 deltacomputer=None,
2382 deltacomputer=None,
2383 sidedata=None,
2383 sidedata=None,
2384 ):
2384 ):
2385 """internal function to add revisions to the log
2385 """internal function to add revisions to the log
2386
2386
2387 see addrevision for argument descriptions.
2387 see addrevision for argument descriptions.
2388
2388
2389 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2389 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2390
2390
2391 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2391 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2392 be used.
2392 be used.
2393
2393
2394 invariants:
2394 invariants:
2395 - rawtext is optional (can be None); if not set, cachedelta must be set.
2395 - rawtext is optional (can be None); if not set, cachedelta must be set.
2396 if both are set, they must correspond to each other.
2396 if both are set, they must correspond to each other.
2397 """
2397 """
2398 if node == self.nullid:
2398 if node == self.nullid:
2399 raise error.RevlogError(
2399 raise error.RevlogError(
2400 _(b"%s: attempt to add null revision") % self.display_id
2400 _(b"%s: attempt to add null revision") % self.display_id
2401 )
2401 )
2402 if (
2402 if (
2403 node == self.nodeconstants.wdirid
2403 node == self.nodeconstants.wdirid
2404 or node in self.nodeconstants.wdirfilenodeids
2404 or node in self.nodeconstants.wdirfilenodeids
2405 ):
2405 ):
2406 raise error.RevlogError(
2406 raise error.RevlogError(
2407 _(b"%s: attempt to add wdir revision") % self.display_id
2407 _(b"%s: attempt to add wdir revision") % self.display_id
2408 )
2408 )
2409 if self._writinghandles is None:
2409 if self._writinghandles is None:
2410 msg = b'adding revision outside `revlog._writing` context'
2410 msg = b'adding revision outside `revlog._writing` context'
2411 raise error.ProgrammingError(msg)
2411 raise error.ProgrammingError(msg)
2412
2412
2413 if self._inline:
2413 if self._inline:
2414 fh = self._writinghandles[0]
2414 fh = self._writinghandles[0]
2415 else:
2415 else:
2416 fh = self._writinghandles[1]
2416 fh = self._writinghandles[1]
2417
2417
2418 btext = [rawtext]
2418 btext = [rawtext]
2419
2419
2420 curr = len(self)
2420 curr = len(self)
2421 prev = curr - 1
2421 prev = curr - 1
2422
2422
2423 offset = self._get_data_offset(prev)
2423 offset = self._get_data_offset(prev)
2424
2424
2425 if self._concurrencychecker:
2425 if self._concurrencychecker:
2426 ifh, dfh, sdfh = self._writinghandles
2426 ifh, dfh, sdfh = self._writinghandles
2427 # XXX no checking for the sidedata file
2427 # XXX no checking for the sidedata file
2428 if self._inline:
2428 if self._inline:
2429 # offset is "as if" it were in the .d file, so we need to add on
2429 # offset is "as if" it were in the .d file, so we need to add on
2430 # the size of the entry metadata.
2430 # the size of the entry metadata.
2431 self._concurrencychecker(
2431 self._concurrencychecker(
2432 ifh, self._indexfile, offset + curr * self.index.entry_size
2432 ifh, self._indexfile, offset + curr * self.index.entry_size
2433 )
2433 )
2434 else:
2434 else:
2435 # Entries in the .i are a consistent size.
2435 # Entries in the .i are a consistent size.
2436 self._concurrencychecker(
2436 self._concurrencychecker(
2437 ifh, self._indexfile, curr * self.index.entry_size
2437 ifh, self._indexfile, curr * self.index.entry_size
2438 )
2438 )
2439 self._concurrencychecker(dfh, self._datafile, offset)
2439 self._concurrencychecker(dfh, self._datafile, offset)
2440
2440
2441 p1r, p2r = self.rev(p1), self.rev(p2)
2441 p1r, p2r = self.rev(p1), self.rev(p2)
2442
2442
2443 # full versions are inserted when the needed deltas
2443 # full versions are inserted when the needed deltas
2444 # become comparable to the uncompressed text
2444 # become comparable to the uncompressed text
2445 if rawtext is None:
2445 if rawtext is None:
2446 # need rawtext size, before changed by flag processors, which is
2446 # need rawtext size, before changed by flag processors, which is
2447 # the non-raw size. use revlog explicitly to avoid filelog's extra
2447 # the non-raw size. use revlog explicitly to avoid filelog's extra
2448 # logic that might remove metadata size.
2448 # logic that might remove metadata size.
2449 textlen = mdiff.patchedsize(
2449 textlen = mdiff.patchedsize(
2450 revlog.size(self, cachedelta[0]), cachedelta[1]
2450 revlog.size(self, cachedelta[0]), cachedelta[1]
2451 )
2451 )
2452 else:
2452 else:
2453 textlen = len(rawtext)
2453 textlen = len(rawtext)
2454
2454
2455 if deltacomputer is None:
2455 if deltacomputer is None:
2456 write_debug = None
2456 write_debug = None
2457 if self._debug_delta:
2457 if self._debug_delta:
2458 write_debug = transaction._report
2458 write_debug = transaction._report
2459 deltacomputer = deltautil.deltacomputer(
2459 deltacomputer = deltautil.deltacomputer(
2460 self, write_debug=write_debug
2460 self, write_debug=write_debug
2461 )
2461 )
2462
2462
2463 if cachedelta is not None and len(cachedelta) == 2:
2463 if cachedelta is not None and len(cachedelta) == 2:
2464 # If the cached delta has no information about how it should be
2464 # If the cached delta has no information about how it should be
2465 # reused, add the default reuse instruction according to the
2465 # reused, add the default reuse instruction according to the
2466 # revlog's configuration.
2466 # revlog's configuration.
2467 if self._generaldelta and self._lazydeltabase:
2467 if self._generaldelta and self._lazydeltabase:
2468 delta_base_reuse = DELTA_BASE_REUSE_TRY
2468 delta_base_reuse = DELTA_BASE_REUSE_TRY
2469 else:
2469 else:
2470 delta_base_reuse = DELTA_BASE_REUSE_NO
2470 delta_base_reuse = DELTA_BASE_REUSE_NO
2471 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2471 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2472
2472
2473 revinfo = revlogutils.revisioninfo(
2473 revinfo = revlogutils.revisioninfo(
2474 node,
2474 node,
2475 p1,
2475 p1,
2476 p2,
2476 p2,
2477 btext,
2477 btext,
2478 textlen,
2478 textlen,
2479 cachedelta,
2479 cachedelta,
2480 flags,
2480 flags,
2481 )
2481 )
2482
2482
2483 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2483 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2484
2484
2485 compression_mode = COMP_MODE_INLINE
2485 compression_mode = COMP_MODE_INLINE
2486 if self._docket is not None:
2486 if self._docket is not None:
2487 default_comp = self._docket.default_compression_header
2487 default_comp = self._docket.default_compression_header
2488 r = deltautil.delta_compression(default_comp, deltainfo)
2488 r = deltautil.delta_compression(default_comp, deltainfo)
2489 compression_mode, deltainfo = r
2489 compression_mode, deltainfo = r
2490
2490
2491 sidedata_compression_mode = COMP_MODE_INLINE
2491 sidedata_compression_mode = COMP_MODE_INLINE
2492 if sidedata and self.hassidedata:
2492 if sidedata and self.hassidedata:
2493 sidedata_compression_mode = COMP_MODE_PLAIN
2493 sidedata_compression_mode = COMP_MODE_PLAIN
2494 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2494 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2495 sidedata_offset = self._docket.sidedata_end
2495 sidedata_offset = self._docket.sidedata_end
2496 h, comp_sidedata = self.compress(serialized_sidedata)
2496 h, comp_sidedata = self.compress(serialized_sidedata)
2497 if (
2497 if (
2498 h != b'u'
2498 h != b'u'
2499 and comp_sidedata[0:1] != b'\0'
2499 and comp_sidedata[0:1] != b'\0'
2500 and len(comp_sidedata) < len(serialized_sidedata)
2500 and len(comp_sidedata) < len(serialized_sidedata)
2501 ):
2501 ):
2502 assert not h
2502 assert not h
2503 if (
2503 if (
2504 comp_sidedata[0:1]
2504 comp_sidedata[0:1]
2505 == self._docket.default_compression_header
2505 == self._docket.default_compression_header
2506 ):
2506 ):
2507 sidedata_compression_mode = COMP_MODE_DEFAULT
2507 sidedata_compression_mode = COMP_MODE_DEFAULT
2508 serialized_sidedata = comp_sidedata
2508 serialized_sidedata = comp_sidedata
2509 else:
2509 else:
2510 sidedata_compression_mode = COMP_MODE_INLINE
2510 sidedata_compression_mode = COMP_MODE_INLINE
2511 serialized_sidedata = comp_sidedata
2511 serialized_sidedata = comp_sidedata
2512 else:
2512 else:
2513 serialized_sidedata = b""
2513 serialized_sidedata = b""
2514 # Don't store the offset if the sidedata is empty, that way
2514 # Don't store the offset if the sidedata is empty, that way
2515 # we can easily detect empty sidedata and they will be no different
2515 # we can easily detect empty sidedata and they will be no different
2516 # than ones we manually add.
2516 # than ones we manually add.
2517 sidedata_offset = 0
2517 sidedata_offset = 0
2518
2518
2519 rank = RANK_UNKNOWN
2519 rank = RANK_UNKNOWN
2520 if self._compute_rank:
2520 if self._compute_rank:
2521 if (p1r, p2r) == (nullrev, nullrev):
2521 if (p1r, p2r) == (nullrev, nullrev):
2522 rank = 1
2522 rank = 1
2523 elif p1r != nullrev and p2r == nullrev:
2523 elif p1r != nullrev and p2r == nullrev:
2524 rank = 1 + self.fast_rank(p1r)
2524 rank = 1 + self.fast_rank(p1r)
2525 elif p1r == nullrev and p2r != nullrev:
2525 elif p1r == nullrev and p2r != nullrev:
2526 rank = 1 + self.fast_rank(p2r)
2526 rank = 1 + self.fast_rank(p2r)
2527 else: # merge node
2527 else: # merge node
2528 if rustdagop is not None and self.index.rust_ext_compat:
2528 if rustdagop is not None and self.index.rust_ext_compat:
2529 rank = rustdagop.rank(self.index, p1r, p2r)
2529 rank = rustdagop.rank(self.index, p1r, p2r)
2530 else:
2530 else:
2531 pmin, pmax = sorted((p1r, p2r))
2531 pmin, pmax = sorted((p1r, p2r))
2532 rank = 1 + self.fast_rank(pmax)
2532 rank = 1 + self.fast_rank(pmax)
2533 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2533 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2534
2534
2535 e = revlogutils.entry(
2535 e = revlogutils.entry(
2536 flags=flags,
2536 flags=flags,
2537 data_offset=offset,
2537 data_offset=offset,
2538 data_compressed_length=deltainfo.deltalen,
2538 data_compressed_length=deltainfo.deltalen,
2539 data_uncompressed_length=textlen,
2539 data_uncompressed_length=textlen,
2540 data_compression_mode=compression_mode,
2540 data_compression_mode=compression_mode,
2541 data_delta_base=deltainfo.base,
2541 data_delta_base=deltainfo.base,
2542 link_rev=link,
2542 link_rev=link,
2543 parent_rev_1=p1r,
2543 parent_rev_1=p1r,
2544 parent_rev_2=p2r,
2544 parent_rev_2=p2r,
2545 node_id=node,
2545 node_id=node,
2546 sidedata_offset=sidedata_offset,
2546 sidedata_offset=sidedata_offset,
2547 sidedata_compressed_length=len(serialized_sidedata),
2547 sidedata_compressed_length=len(serialized_sidedata),
2548 sidedata_compression_mode=sidedata_compression_mode,
2548 sidedata_compression_mode=sidedata_compression_mode,
2549 rank=rank,
2549 rank=rank,
2550 )
2550 )
2551
2551
2552 self.index.append(e)
2552 self.index.append(e)
2553 entry = self.index.entry_binary(curr)
2553 entry = self.index.entry_binary(curr)
2554 if curr == 0 and self._docket is None:
2554 if curr == 0 and self._docket is None:
2555 header = self._format_flags | self._format_version
2555 header = self._format_flags | self._format_version
2556 header = self.index.pack_header(header)
2556 header = self.index.pack_header(header)
2557 entry = header + entry
2557 entry = header + entry
2558 self._writeentry(
2558 self._writeentry(
2559 transaction,
2559 transaction,
2560 entry,
2560 entry,
2561 deltainfo.data,
2561 deltainfo.data,
2562 link,
2562 link,
2563 offset,
2563 offset,
2564 serialized_sidedata,
2564 serialized_sidedata,
2565 sidedata_offset,
2565 sidedata_offset,
2566 )
2566 )
2567
2567
2568 rawtext = btext[0]
2568 rawtext = btext[0]
2569
2569
2570 if alwayscache and rawtext is None:
2570 if alwayscache and rawtext is None:
2571 rawtext = deltacomputer.buildtext(revinfo, fh)
2571 rawtext = deltacomputer.buildtext(revinfo, fh)
2572
2572
2573 if type(rawtext) == bytes: # only accept immutable objects
2573 if type(rawtext) == bytes: # only accept immutable objects
2574 self._revisioncache = (node, curr, rawtext)
2574 self._revisioncache = (node, curr, rawtext)
2575 self._chainbasecache[curr] = deltainfo.chainbase
2575 self._chainbasecache[curr] = deltainfo.chainbase
2576 return curr
2576 return curr
2577
2577
2578 def _get_data_offset(self, prev):
2578 def _get_data_offset(self, prev):
2579 """Returns the current offset in the (in-transaction) data file.
2579 """Returns the current offset in the (in-transaction) data file.
2580 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2580 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2581 file to store that information: since sidedata can be rewritten to the
2581 file to store that information: since sidedata can be rewritten to the
2582 end of the data file within a transaction, you can have cases where, for
2582 end of the data file within a transaction, you can have cases where, for
2583 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2583 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2584 to `n - 1`'s sidedata being written after `n`'s data.
2584 to `n - 1`'s sidedata being written after `n`'s data.
2585
2585
2586 TODO cache this in a docket file before getting out of experimental."""
2586 TODO cache this in a docket file before getting out of experimental."""
2587 if self._docket is None:
2587 if self._docket is None:
2588 return self.end(prev)
2588 return self.end(prev)
2589 else:
2589 else:
2590 return self._docket.data_end
2590 return self._docket.data_end
2591
2591
2592 def _writeentry(
2592 def _writeentry(
2593 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2593 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2594 ):
2594 ):
2595 # Files opened in a+ mode have inconsistent behavior on various
2595 # Files opened in a+ mode have inconsistent behavior on various
2596 # platforms. Windows requires that a file positioning call be made
2596 # platforms. Windows requires that a file positioning call be made
2597 # when the file handle transitions between reads and writes. See
2597 # when the file handle transitions between reads and writes. See
2598 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2598 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2599 # platforms, Python or the platform itself can be buggy. Some versions
2599 # platforms, Python or the platform itself can be buggy. Some versions
2600 # of Solaris have been observed to not append at the end of the file
2600 # of Solaris have been observed to not append at the end of the file
2601 # if the file was seeked to before the end. See issue4943 for more.
2601 # if the file was seeked to before the end. See issue4943 for more.
2602 #
2602 #
2603 # We work around this issue by inserting a seek() before writing.
2603 # We work around this issue by inserting a seek() before writing.
2604 # Note: This is likely not necessary on Python 3. However, because
2604 # Note: This is likely not necessary on Python 3. However, because
2605 # the file handle is reused for reads and may be seeked there, we need
2605 # the file handle is reused for reads and may be seeked there, we need
2606 # to be careful before changing this.
2606 # to be careful before changing this.
2607 if self._writinghandles is None:
2607 if self._writinghandles is None:
2608 msg = b'adding revision outside `revlog._writing` context'
2608 msg = b'adding revision outside `revlog._writing` context'
2609 raise error.ProgrammingError(msg)
2609 raise error.ProgrammingError(msg)
2610 ifh, dfh, sdfh = self._writinghandles
2610 ifh, dfh, sdfh = self._writinghandles
2611 if self._docket is None:
2611 if self._docket is None:
2612 ifh.seek(0, os.SEEK_END)
2612 ifh.seek(0, os.SEEK_END)
2613 else:
2613 else:
2614 ifh.seek(self._docket.index_end, os.SEEK_SET)
2614 ifh.seek(self._docket.index_end, os.SEEK_SET)
2615 if dfh:
2615 if dfh:
2616 if self._docket is None:
2616 if self._docket is None:
2617 dfh.seek(0, os.SEEK_END)
2617 dfh.seek(0, os.SEEK_END)
2618 else:
2618 else:
2619 dfh.seek(self._docket.data_end, os.SEEK_SET)
2619 dfh.seek(self._docket.data_end, os.SEEK_SET)
2620 if sdfh:
2620 if sdfh:
2621 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2621 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2622
2622
2623 curr = len(self) - 1
2623 curr = len(self) - 1
2624 if not self._inline:
2624 if not self._inline:
2625 transaction.add(self._datafile, offset)
2625 transaction.add(self._datafile, offset)
2626 if self._sidedatafile:
2626 if self._sidedatafile:
2627 transaction.add(self._sidedatafile, sidedata_offset)
2627 transaction.add(self._sidedatafile, sidedata_offset)
2628 transaction.add(self._indexfile, curr * len(entry))
2628 transaction.add(self._indexfile, curr * len(entry))
2629 if data[0]:
2629 if data[0]:
2630 dfh.write(data[0])
2630 dfh.write(data[0])
2631 dfh.write(data[1])
2631 dfh.write(data[1])
2632 if sidedata:
2632 if sidedata:
2633 sdfh.write(sidedata)
2633 sdfh.write(sidedata)
2634 ifh.write(entry)
2634 ifh.write(entry)
2635 else:
2635 else:
2636 offset += curr * self.index.entry_size
2636 offset += curr * self.index.entry_size
2637 transaction.add(self._indexfile, offset)
2637 transaction.add(self._indexfile, offset)
2638 ifh.write(entry)
2638 ifh.write(entry)
2639 ifh.write(data[0])
2639 ifh.write(data[0])
2640 ifh.write(data[1])
2640 ifh.write(data[1])
2641 assert not sidedata
2641 assert not sidedata
2642 self._enforceinlinesize(transaction)
2642 self._enforceinlinesize(transaction)
2643 if self._docket is not None:
2643 if self._docket is not None:
2644 # revlog-v2 always has 3 writing handles, help Pytype
2644 # revlog-v2 always has 3 writing handles, help Pytype
2645 wh1 = self._writinghandles[0]
2645 wh1 = self._writinghandles[0]
2646 wh2 = self._writinghandles[1]
2646 wh2 = self._writinghandles[1]
2647 wh3 = self._writinghandles[2]
2647 wh3 = self._writinghandles[2]
2648 assert wh1 is not None
2648 assert wh1 is not None
2649 assert wh2 is not None
2649 assert wh2 is not None
2650 assert wh3 is not None
2650 assert wh3 is not None
2651 self._docket.index_end = wh1.tell()
2651 self._docket.index_end = wh1.tell()
2652 self._docket.data_end = wh2.tell()
2652 self._docket.data_end = wh2.tell()
2653 self._docket.sidedata_end = wh3.tell()
2653 self._docket.sidedata_end = wh3.tell()
2654
2654
2655 nodemaputil.setup_persistent_nodemap(transaction, self)
2655 nodemaputil.setup_persistent_nodemap(transaction, self)
2656
2656
2657 def addgroup(
2657 def addgroup(
2658 self,
2658 self,
2659 deltas,
2659 deltas,
2660 linkmapper,
2660 linkmapper,
2661 transaction,
2661 transaction,
2662 alwayscache=False,
2662 alwayscache=False,
2663 addrevisioncb=None,
2663 addrevisioncb=None,
2664 duplicaterevisioncb=None,
2664 duplicaterevisioncb=None,
2665 debug_info=None,
2665 debug_info=None,
2666 delta_base_reuse_policy=None,
2666 delta_base_reuse_policy=None,
2667 ):
2667 ):
2668 """
2668 """
2669 add a delta group
2669 add a delta group
2670
2670
2671 given a set of deltas, add them to the revision log. the
2671 given a set of deltas, add them to the revision log. the
2672 first delta is against its parent, which should be in our
2672 first delta is against its parent, which should be in our
2673 log, the rest are against the previous delta.
2673 log, the rest are against the previous delta.
2674
2674
2675 If ``addrevisioncb`` is defined, it will be called with arguments of
2675 If ``addrevisioncb`` is defined, it will be called with arguments of
2676 this revlog and the node that was added.
2676 this revlog and the node that was added.
2677 """
2677 """
2678
2678
2679 if self._adding_group:
2679 if self._adding_group:
2680 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2680 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2681
2681
2682 # read the default delta-base reuse policy from revlog config if the
2682 # read the default delta-base reuse policy from revlog config if the
2683 # group did not specify one.
2683 # group did not specify one.
2684 if delta_base_reuse_policy is None:
2684 if delta_base_reuse_policy is None:
2685 if self._generaldelta and self._lazydeltabase:
2685 if self._generaldelta and self._lazydeltabase:
2686 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2686 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2687 else:
2687 else:
2688 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2688 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2689
2689
2690 self._adding_group = True
2690 self._adding_group = True
2691 empty = True
2691 empty = True
2692 try:
2692 try:
2693 with self._writing(transaction):
2693 with self._writing(transaction):
2694 write_debug = None
2694 write_debug = None
2695 if self._debug_delta:
2695 if self._debug_delta:
2696 write_debug = transaction._report
2696 write_debug = transaction._report
2697 deltacomputer = deltautil.deltacomputer(
2697 deltacomputer = deltautil.deltacomputer(
2698 self,
2698 self,
2699 write_debug=write_debug,
2699 write_debug=write_debug,
2700 debug_info=debug_info,
2700 debug_info=debug_info,
2701 )
2701 )
2702 # loop through our set of deltas
2702 # loop through our set of deltas
2703 for data in deltas:
2703 for data in deltas:
2704 (
2704 (
2705 node,
2705 node,
2706 p1,
2706 p1,
2707 p2,
2707 p2,
2708 linknode,
2708 linknode,
2709 deltabase,
2709 deltabase,
2710 delta,
2710 delta,
2711 flags,
2711 flags,
2712 sidedata,
2712 sidedata,
2713 ) = data
2713 ) = data
2714 link = linkmapper(linknode)
2714 link = linkmapper(linknode)
2715 flags = flags or REVIDX_DEFAULT_FLAGS
2715 flags = flags or REVIDX_DEFAULT_FLAGS
2716
2716
2717 rev = self.index.get_rev(node)
2717 rev = self.index.get_rev(node)
2718 if rev is not None:
2718 if rev is not None:
2719 # this can happen if two branches make the same change
2719 # this can happen if two branches make the same change
2720 self._nodeduplicatecallback(transaction, rev)
2720 self._nodeduplicatecallback(transaction, rev)
2721 if duplicaterevisioncb:
2721 if duplicaterevisioncb:
2722 duplicaterevisioncb(self, rev)
2722 duplicaterevisioncb(self, rev)
2723 empty = False
2723 empty = False
2724 continue
2724 continue
2725
2725
2726 for p in (p1, p2):
2726 for p in (p1, p2):
2727 if not self.index.has_node(p):
2727 if not self.index.has_node(p):
2728 raise error.LookupError(
2728 raise error.LookupError(
2729 p, self.radix, _(b'unknown parent')
2729 p, self.radix, _(b'unknown parent')
2730 )
2730 )
2731
2731
2732 if not self.index.has_node(deltabase):
2732 if not self.index.has_node(deltabase):
2733 raise error.LookupError(
2733 raise error.LookupError(
2734 deltabase, self.display_id, _(b'unknown delta base')
2734 deltabase, self.display_id, _(b'unknown delta base')
2735 )
2735 )
2736
2736
2737 baserev = self.rev(deltabase)
2737 baserev = self.rev(deltabase)
2738
2738
2739 if baserev != nullrev and self.iscensored(baserev):
2739 if baserev != nullrev and self.iscensored(baserev):
2740 # if base is censored, delta must be full replacement in a
2740 # if base is censored, delta must be full replacement in a
2741 # single patch operation
2741 # single patch operation
2742 hlen = struct.calcsize(b">lll")
2742 hlen = struct.calcsize(b">lll")
2743 oldlen = self.rawsize(baserev)
2743 oldlen = self.rawsize(baserev)
2744 newlen = len(delta) - hlen
2744 newlen = len(delta) - hlen
2745 if delta[:hlen] != mdiff.replacediffheader(
2745 if delta[:hlen] != mdiff.replacediffheader(
2746 oldlen, newlen
2746 oldlen, newlen
2747 ):
2747 ):
2748 raise error.CensoredBaseError(
2748 raise error.CensoredBaseError(
2749 self.display_id, self.node(baserev)
2749 self.display_id, self.node(baserev)
2750 )
2750 )
2751
2751
2752 if not flags and self._peek_iscensored(baserev, delta):
2752 if not flags and self._peek_iscensored(baserev, delta):
2753 flags |= REVIDX_ISCENSORED
2753 flags |= REVIDX_ISCENSORED
2754
2754
2755 # We assume consumers of addrevisioncb will want to retrieve
2755 # We assume consumers of addrevisioncb will want to retrieve
2756 # the added revision, which will require a call to
2756 # the added revision, which will require a call to
2757 # revision(). revision() will fast path if there is a cache
2757 # revision(). revision() will fast path if there is a cache
2758 # hit. So, we tell _addrevision() to always cache in this case.
2758 # hit. So, we tell _addrevision() to always cache in this case.
2759 # We're only using addgroup() in the context of changegroup
2759 # We're only using addgroup() in the context of changegroup
2760 # generation so the revision data can always be handled as raw
2760 # generation so the revision data can always be handled as raw
2761 # by the flagprocessor.
2761 # by the flagprocessor.
2762 rev = self._addrevision(
2762 rev = self._addrevision(
2763 node,
2763 node,
2764 None,
2764 None,
2765 transaction,
2765 transaction,
2766 link,
2766 link,
2767 p1,
2767 p1,
2768 p2,
2768 p2,
2769 flags,
2769 flags,
2770 (baserev, delta, delta_base_reuse_policy),
2770 (baserev, delta, delta_base_reuse_policy),
2771 alwayscache=alwayscache,
2771 alwayscache=alwayscache,
2772 deltacomputer=deltacomputer,
2772 deltacomputer=deltacomputer,
2773 sidedata=sidedata,
2773 sidedata=sidedata,
2774 )
2774 )
2775
2775
2776 if addrevisioncb:
2776 if addrevisioncb:
2777 addrevisioncb(self, rev)
2777 addrevisioncb(self, rev)
2778 empty = False
2778 empty = False
2779 finally:
2779 finally:
2780 self._adding_group = False
2780 self._adding_group = False
2781 return not empty
2781 return not empty
2782
2782
2783 def iscensored(self, rev):
2783 def iscensored(self, rev):
2784 """Check if a file revision is censored."""
2784 """Check if a file revision is censored."""
2785 if not self._censorable:
2785 if not self._censorable:
2786 return False
2786 return False
2787
2787
2788 return self.flags(rev) & REVIDX_ISCENSORED
2788 return self.flags(rev) & REVIDX_ISCENSORED
2789
2789
2790 def _peek_iscensored(self, baserev, delta):
2790 def _peek_iscensored(self, baserev, delta):
2791 """Quickly check if a delta produces a censored revision."""
2791 """Quickly check if a delta produces a censored revision."""
2792 if not self._censorable:
2792 if not self._censorable:
2793 return False
2793 return False
2794
2794
2795 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2795 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2796
2796
2797 def getstrippoint(self, minlink):
2797 def getstrippoint(self, minlink):
2798 """find the minimum rev that must be stripped to strip the linkrev
2798 """find the minimum rev that must be stripped to strip the linkrev
2799
2799
2800 Returns a tuple containing the minimum rev and a set of all revs that
2800 Returns a tuple containing the minimum rev and a set of all revs that
2801 have linkrevs that will be broken by this strip.
2801 have linkrevs that will be broken by this strip.
2802 """
2802 """
2803 return storageutil.resolvestripinfo(
2803 return storageutil.resolvestripinfo(
2804 minlink,
2804 minlink,
2805 len(self) - 1,
2805 len(self) - 1,
2806 self.headrevs(),
2806 self.headrevs(),
2807 self.linkrev,
2807 self.linkrev,
2808 self.parentrevs,
2808 self.parentrevs,
2809 )
2809 )
2810
2810
2811 def strip(self, minlink, transaction):
2811 def strip(self, minlink, transaction):
2812 """truncate the revlog on the first revision with a linkrev >= minlink
2812 """truncate the revlog on the first revision with a linkrev >= minlink
2813
2813
2814 This function is called when we're stripping revision minlink and
2814 This function is called when we're stripping revision minlink and
2815 its descendants from the repository.
2815 its descendants from the repository.
2816
2816
2817 We have to remove all revisions with linkrev >= minlink, because
2817 We have to remove all revisions with linkrev >= minlink, because
2818 the equivalent changelog revisions will be renumbered after the
2818 the equivalent changelog revisions will be renumbered after the
2819 strip.
2819 strip.
2820
2820
2821 So we truncate the revlog on the first of these revisions, and
2821 So we truncate the revlog on the first of these revisions, and
2822 trust that the caller has saved the revisions that shouldn't be
2822 trust that the caller has saved the revisions that shouldn't be
2823 removed and that it'll re-add them after this truncation.
2823 removed and that it'll re-add them after this truncation.
2824 """
2824 """
2825 if len(self) == 0:
2825 if len(self) == 0:
2826 return
2826 return
2827
2827
2828 rev, _ = self.getstrippoint(minlink)
2828 rev, _ = self.getstrippoint(minlink)
2829 if rev == len(self):
2829 if rev == len(self):
2830 return
2830 return
2831
2831
2832 # first truncate the files on disk
2832 # first truncate the files on disk
2833 data_end = self.start(rev)
2833 data_end = self.start(rev)
2834 if not self._inline:
2834 if not self._inline:
2835 transaction.add(self._datafile, data_end)
2835 transaction.add(self._datafile, data_end)
2836 end = rev * self.index.entry_size
2836 end = rev * self.index.entry_size
2837 else:
2837 else:
2838 end = data_end + (rev * self.index.entry_size)
2838 end = data_end + (rev * self.index.entry_size)
2839
2839
2840 if self._sidedatafile:
2840 if self._sidedatafile:
2841 sidedata_end = self.sidedata_cut_off(rev)
2841 sidedata_end = self.sidedata_cut_off(rev)
2842 transaction.add(self._sidedatafile, sidedata_end)
2842 transaction.add(self._sidedatafile, sidedata_end)
2843
2843
2844 transaction.add(self._indexfile, end)
2844 transaction.add(self._indexfile, end)
2845 if self._docket is not None:
2845 if self._docket is not None:
2846 # XXX we could, leverage the docket while stripping. However it is
2846 # XXX we could, leverage the docket while stripping. However it is
2847 # not powerfull enough at the time of this comment
2847 # not powerfull enough at the time of this comment
2848 self._docket.index_end = end
2848 self._docket.index_end = end
2849 self._docket.data_end = data_end
2849 self._docket.data_end = data_end
2850 self._docket.sidedata_end = sidedata_end
2850 self._docket.sidedata_end = sidedata_end
2851 self._docket.write(transaction, stripping=True)
2851 self._docket.write(transaction, stripping=True)
2852
2852
2853 # then reset internal state in memory to forget those revisions
2853 # then reset internal state in memory to forget those revisions
2854 self._revisioncache = None
2854 self._revisioncache = None
2855 self._chaininfocache = util.lrucachedict(500)
2855 self._chaininfocache = util.lrucachedict(500)
2856 self._segmentfile.clear_cache()
2856 self._segmentfile.clear_cache()
2857 self._segmentfile_sidedata.clear_cache()
2857 self._segmentfile_sidedata.clear_cache()
2858
2858
2859 del self.index[rev:-1]
2859 del self.index[rev:-1]
2860
2860
2861 def checksize(self):
2861 def checksize(self):
2862 """Check size of index and data files
2862 """Check size of index and data files
2863
2863
2864 return a (dd, di) tuple.
2864 return a (dd, di) tuple.
2865 - dd: extra bytes for the "data" file
2865 - dd: extra bytes for the "data" file
2866 - di: extra bytes for the "index" file
2866 - di: extra bytes for the "index" file
2867
2867
2868 A healthy revlog will return (0, 0).
2868 A healthy revlog will return (0, 0).
2869 """
2869 """
2870 expected = 0
2870 expected = 0
2871 if len(self):
2871 if len(self):
2872 expected = max(0, self.end(len(self) - 1))
2872 expected = max(0, self.end(len(self) - 1))
2873
2873
2874 try:
2874 try:
2875 with self._datafp() as f:
2875 with self._datafp() as f:
2876 f.seek(0, io.SEEK_END)
2876 f.seek(0, io.SEEK_END)
2877 actual = f.tell()
2877 actual = f.tell()
2878 dd = actual - expected
2878 dd = actual - expected
2879 except FileNotFoundError:
2879 except FileNotFoundError:
2880 dd = 0
2880 dd = 0
2881
2881
2882 try:
2882 try:
2883 f = self.opener(self._indexfile)
2883 f = self.opener(self._indexfile)
2884 f.seek(0, io.SEEK_END)
2884 f.seek(0, io.SEEK_END)
2885 actual = f.tell()
2885 actual = f.tell()
2886 f.close()
2886 f.close()
2887 s = self.index.entry_size
2887 s = self.index.entry_size
2888 i = max(0, actual // s)
2888 i = max(0, actual // s)
2889 di = actual - (i * s)
2889 di = actual - (i * s)
2890 if self._inline:
2890 if self._inline:
2891 databytes = 0
2891 databytes = 0
2892 for r in self:
2892 for r in self:
2893 databytes += max(0, self.length(r))
2893 databytes += max(0, self.length(r))
2894 dd = 0
2894 dd = 0
2895 di = actual - len(self) * s - databytes
2895 di = actual - len(self) * s - databytes
2896 except FileNotFoundError:
2896 except FileNotFoundError:
2897 di = 0
2897 di = 0
2898
2898
2899 return (dd, di)
2899 return (dd, di)
2900
2900
2901 def files(self):
2901 def files(self):
2902 res = [self._indexfile]
2902 res = [self._indexfile]
2903 if self._docket_file is None:
2903 if self._docket_file is None:
2904 if not self._inline:
2904 if not self._inline:
2905 res.append(self._datafile)
2905 res.append(self._datafile)
2906 else:
2906 else:
2907 res.append(self._docket_file)
2907 res.append(self._docket_file)
2908 res.extend(self._docket.old_index_filepaths(include_empty=False))
2908 res.extend(self._docket.old_index_filepaths(include_empty=False))
2909 if self._docket.data_end:
2909 if self._docket.data_end:
2910 res.append(self._datafile)
2910 res.append(self._datafile)
2911 res.extend(self._docket.old_data_filepaths(include_empty=False))
2911 res.extend(self._docket.old_data_filepaths(include_empty=False))
2912 if self._docket.sidedata_end:
2912 if self._docket.sidedata_end:
2913 res.append(self._sidedatafile)
2913 res.append(self._sidedatafile)
2914 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2914 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2915 return res
2915 return res
2916
2916
2917 def emitrevisions(
2917 def emitrevisions(
2918 self,
2918 self,
2919 nodes,
2919 nodes,
2920 nodesorder=None,
2920 nodesorder=None,
2921 revisiondata=False,
2921 revisiondata=False,
2922 assumehaveparentrevisions=False,
2922 assumehaveparentrevisions=False,
2923 deltamode=repository.CG_DELTAMODE_STD,
2923 deltamode=repository.CG_DELTAMODE_STD,
2924 sidedata_helpers=None,
2924 sidedata_helpers=None,
2925 debug_info=None,
2925 debug_info=None,
2926 ):
2926 ):
2927 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2927 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2928 raise error.ProgrammingError(
2928 raise error.ProgrammingError(
2929 b'unhandled value for nodesorder: %s' % nodesorder
2929 b'unhandled value for nodesorder: %s' % nodesorder
2930 )
2930 )
2931
2931
2932 if nodesorder is None and not self._generaldelta:
2932 if nodesorder is None and not self._generaldelta:
2933 nodesorder = b'storage'
2933 nodesorder = b'storage'
2934
2934
2935 if (
2935 if (
2936 not self._storedeltachains
2936 not self._storedeltachains
2937 and deltamode != repository.CG_DELTAMODE_PREV
2937 and deltamode != repository.CG_DELTAMODE_PREV
2938 ):
2938 ):
2939 deltamode = repository.CG_DELTAMODE_FULL
2939 deltamode = repository.CG_DELTAMODE_FULL
2940
2940
2941 return storageutil.emitrevisions(
2941 return storageutil.emitrevisions(
2942 self,
2942 self,
2943 nodes,
2943 nodes,
2944 nodesorder,
2944 nodesorder,
2945 revlogrevisiondelta,
2945 revlogrevisiondelta,
2946 deltaparentfn=self.deltaparent,
2946 deltaparentfn=self.deltaparent,
2947 candeltafn=self.candelta,
2947 candeltafn=self.candelta,
2948 rawsizefn=self.rawsize,
2948 rawsizefn=self.rawsize,
2949 revdifffn=self.revdiff,
2949 revdifffn=self.revdiff,
2950 flagsfn=self.flags,
2950 flagsfn=self.flags,
2951 deltamode=deltamode,
2951 deltamode=deltamode,
2952 revisiondata=revisiondata,
2952 revisiondata=revisiondata,
2953 assumehaveparentrevisions=assumehaveparentrevisions,
2953 assumehaveparentrevisions=assumehaveparentrevisions,
2954 sidedata_helpers=sidedata_helpers,
2954 sidedata_helpers=sidedata_helpers,
2955 debug_info=debug_info,
2955 debug_info=debug_info,
2956 )
2956 )
2957
2957
2958 DELTAREUSEALWAYS = b'always'
2958 DELTAREUSEALWAYS = b'always'
2959 DELTAREUSESAMEREVS = b'samerevs'
2959 DELTAREUSESAMEREVS = b'samerevs'
2960 DELTAREUSENEVER = b'never'
2960 DELTAREUSENEVER = b'never'
2961
2961
2962 DELTAREUSEFULLADD = b'fulladd'
2962 DELTAREUSEFULLADD = b'fulladd'
2963
2963
2964 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2964 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2965
2965
2966 def clone(
2966 def clone(
2967 self,
2967 self,
2968 tr,
2968 tr,
2969 destrevlog,
2969 destrevlog,
2970 addrevisioncb=None,
2970 addrevisioncb=None,
2971 deltareuse=DELTAREUSESAMEREVS,
2971 deltareuse=DELTAREUSESAMEREVS,
2972 forcedeltabothparents=None,
2972 forcedeltabothparents=None,
2973 sidedata_helpers=None,
2973 sidedata_helpers=None,
2974 ):
2974 ):
2975 """Copy this revlog to another, possibly with format changes.
2975 """Copy this revlog to another, possibly with format changes.
2976
2976
2977 The destination revlog will contain the same revisions and nodes.
2977 The destination revlog will contain the same revisions and nodes.
2978 However, it may not be bit-for-bit identical due to e.g. delta encoding
2978 However, it may not be bit-for-bit identical due to e.g. delta encoding
2979 differences.
2979 differences.
2980
2980
2981 The ``deltareuse`` argument control how deltas from the existing revlog
2981 The ``deltareuse`` argument control how deltas from the existing revlog
2982 are preserved in the destination revlog. The argument can have the
2982 are preserved in the destination revlog. The argument can have the
2983 following values:
2983 following values:
2984
2984
2985 DELTAREUSEALWAYS
2985 DELTAREUSEALWAYS
2986 Deltas will always be reused (if possible), even if the destination
2986 Deltas will always be reused (if possible), even if the destination
2987 revlog would not select the same revisions for the delta. This is the
2987 revlog would not select the same revisions for the delta. This is the
2988 fastest mode of operation.
2988 fastest mode of operation.
2989 DELTAREUSESAMEREVS
2989 DELTAREUSESAMEREVS
2990 Deltas will be reused if the destination revlog would pick the same
2990 Deltas will be reused if the destination revlog would pick the same
2991 revisions for the delta. This mode strikes a balance between speed
2991 revisions for the delta. This mode strikes a balance between speed
2992 and optimization.
2992 and optimization.
2993 DELTAREUSENEVER
2993 DELTAREUSENEVER
2994 Deltas will never be reused. This is the slowest mode of execution.
2994 Deltas will never be reused. This is the slowest mode of execution.
2995 This mode can be used to recompute deltas (e.g. if the diff/delta
2995 This mode can be used to recompute deltas (e.g. if the diff/delta
2996 algorithm changes).
2996 algorithm changes).
2997 DELTAREUSEFULLADD
2997 DELTAREUSEFULLADD
2998 Revision will be re-added as if their were new content. This is
2998 Revision will be re-added as if their were new content. This is
2999 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2999 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3000 eg: large file detection and handling.
3000 eg: large file detection and handling.
3001
3001
3002 Delta computation can be slow, so the choice of delta reuse policy can
3002 Delta computation can be slow, so the choice of delta reuse policy can
3003 significantly affect run time.
3003 significantly affect run time.
3004
3004
3005 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3005 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3006 two extremes. Deltas will be reused if they are appropriate. But if the
3006 two extremes. Deltas will be reused if they are appropriate. But if the
3007 delta could choose a better revision, it will do so. This means if you
3007 delta could choose a better revision, it will do so. This means if you
3008 are converting a non-generaldelta revlog to a generaldelta revlog,
3008 are converting a non-generaldelta revlog to a generaldelta revlog,
3009 deltas will be recomputed if the delta's parent isn't a parent of the
3009 deltas will be recomputed if the delta's parent isn't a parent of the
3010 revision.
3010 revision.
3011
3011
3012 In addition to the delta policy, the ``forcedeltabothparents``
3012 In addition to the delta policy, the ``forcedeltabothparents``
3013 argument controls whether to force compute deltas against both parents
3013 argument controls whether to force compute deltas against both parents
3014 for merges. By default, the current default is used.
3014 for merges. By default, the current default is used.
3015
3015
3016 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3016 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3017 `sidedata_helpers`.
3017 `sidedata_helpers`.
3018 """
3018 """
3019 if deltareuse not in self.DELTAREUSEALL:
3019 if deltareuse not in self.DELTAREUSEALL:
3020 raise ValueError(
3020 raise ValueError(
3021 _(b'value for deltareuse invalid: %s') % deltareuse
3021 _(b'value for deltareuse invalid: %s') % deltareuse
3022 )
3022 )
3023
3023
3024 if len(destrevlog):
3024 if len(destrevlog):
3025 raise ValueError(_(b'destination revlog is not empty'))
3025 raise ValueError(_(b'destination revlog is not empty'))
3026
3026
3027 if getattr(self, 'filteredrevs', None):
3027 if getattr(self, 'filteredrevs', None):
3028 raise ValueError(_(b'source revlog has filtered revisions'))
3028 raise ValueError(_(b'source revlog has filtered revisions'))
3029 if getattr(destrevlog, 'filteredrevs', None):
3029 if getattr(destrevlog, 'filteredrevs', None):
3030 raise ValueError(_(b'destination revlog has filtered revisions'))
3030 raise ValueError(_(b'destination revlog has filtered revisions'))
3031
3031
3032 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3032 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3033 # if possible.
3033 # if possible.
3034 oldlazydelta = destrevlog._lazydelta
3034 oldlazydelta = destrevlog._lazydelta
3035 oldlazydeltabase = destrevlog._lazydeltabase
3035 oldlazydeltabase = destrevlog._lazydeltabase
3036 oldamd = destrevlog._deltabothparents
3036 oldamd = destrevlog._deltabothparents
3037
3037
3038 try:
3038 try:
3039 if deltareuse == self.DELTAREUSEALWAYS:
3039 if deltareuse == self.DELTAREUSEALWAYS:
3040 destrevlog._lazydeltabase = True
3040 destrevlog._lazydeltabase = True
3041 destrevlog._lazydelta = True
3041 destrevlog._lazydelta = True
3042 elif deltareuse == self.DELTAREUSESAMEREVS:
3042 elif deltareuse == self.DELTAREUSESAMEREVS:
3043 destrevlog._lazydeltabase = False
3043 destrevlog._lazydeltabase = False
3044 destrevlog._lazydelta = True
3044 destrevlog._lazydelta = True
3045 elif deltareuse == self.DELTAREUSENEVER:
3045 elif deltareuse == self.DELTAREUSENEVER:
3046 destrevlog._lazydeltabase = False
3046 destrevlog._lazydeltabase = False
3047 destrevlog._lazydelta = False
3047 destrevlog._lazydelta = False
3048
3048
3049 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3049 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3050
3050
3051 self._clone(
3051 self._clone(
3052 tr,
3052 tr,
3053 destrevlog,
3053 destrevlog,
3054 addrevisioncb,
3054 addrevisioncb,
3055 deltareuse,
3055 deltareuse,
3056 forcedeltabothparents,
3056 forcedeltabothparents,
3057 sidedata_helpers,
3057 sidedata_helpers,
3058 )
3058 )
3059
3059
3060 finally:
3060 finally:
3061 destrevlog._lazydelta = oldlazydelta
3061 destrevlog._lazydelta = oldlazydelta
3062 destrevlog._lazydeltabase = oldlazydeltabase
3062 destrevlog._lazydeltabase = oldlazydeltabase
3063 destrevlog._deltabothparents = oldamd
3063 destrevlog._deltabothparents = oldamd
3064
3064
3065 def _clone(
3065 def _clone(
3066 self,
3066 self,
3067 tr,
3067 tr,
3068 destrevlog,
3068 destrevlog,
3069 addrevisioncb,
3069 addrevisioncb,
3070 deltareuse,
3070 deltareuse,
3071 forcedeltabothparents,
3071 forcedeltabothparents,
3072 sidedata_helpers,
3072 sidedata_helpers,
3073 ):
3073 ):
3074 """perform the core duty of `revlog.clone` after parameter processing"""
3074 """perform the core duty of `revlog.clone` after parameter processing"""
3075 write_debug = None
3075 write_debug = None
3076 if self._debug_delta:
3076 if self._debug_delta:
3077 write_debug = tr._report
3077 write_debug = tr._report
3078 deltacomputer = deltautil.deltacomputer(
3078 deltacomputer = deltautil.deltacomputer(
3079 destrevlog,
3079 destrevlog,
3080 write_debug=write_debug,
3080 write_debug=write_debug,
3081 )
3081 )
3082 index = self.index
3082 index = self.index
3083 for rev in self:
3083 for rev in self:
3084 entry = index[rev]
3084 entry = index[rev]
3085
3085
3086 # Some classes override linkrev to take filtered revs into
3086 # Some classes override linkrev to take filtered revs into
3087 # account. Use raw entry from index.
3087 # account. Use raw entry from index.
3088 flags = entry[0] & 0xFFFF
3088 flags = entry[0] & 0xFFFF
3089 linkrev = entry[4]
3089 linkrev = entry[4]
3090 p1 = index[entry[5]][7]
3090 p1 = index[entry[5]][7]
3091 p2 = index[entry[6]][7]
3091 p2 = index[entry[6]][7]
3092 node = entry[7]
3092 node = entry[7]
3093
3093
3094 # (Possibly) reuse the delta from the revlog if allowed and
3094 # (Possibly) reuse the delta from the revlog if allowed and
3095 # the revlog chunk is a delta.
3095 # the revlog chunk is a delta.
3096 cachedelta = None
3096 cachedelta = None
3097 rawtext = None
3097 rawtext = None
3098 if deltareuse == self.DELTAREUSEFULLADD:
3098 if deltareuse == self.DELTAREUSEFULLADD:
3099 text = self._revisiondata(rev)
3099 text = self._revisiondata(rev)
3100 sidedata = self.sidedata(rev)
3100 sidedata = self.sidedata(rev)
3101
3101
3102 if sidedata_helpers is not None:
3102 if sidedata_helpers is not None:
3103 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3103 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3104 self, sidedata_helpers, sidedata, rev
3104 self, sidedata_helpers, sidedata, rev
3105 )
3105 )
3106 flags = flags | new_flags[0] & ~new_flags[1]
3106 flags = flags | new_flags[0] & ~new_flags[1]
3107
3107
3108 destrevlog.addrevision(
3108 destrevlog.addrevision(
3109 text,
3109 text,
3110 tr,
3110 tr,
3111 linkrev,
3111 linkrev,
3112 p1,
3112 p1,
3113 p2,
3113 p2,
3114 cachedelta=cachedelta,
3114 cachedelta=cachedelta,
3115 node=node,
3115 node=node,
3116 flags=flags,
3116 flags=flags,
3117 deltacomputer=deltacomputer,
3117 deltacomputer=deltacomputer,
3118 sidedata=sidedata,
3118 sidedata=sidedata,
3119 )
3119 )
3120 else:
3120 else:
3121 if destrevlog._lazydelta:
3121 if destrevlog._lazydelta:
3122 dp = self.deltaparent(rev)
3122 dp = self.deltaparent(rev)
3123 if dp != nullrev:
3123 if dp != nullrev:
3124 cachedelta = (dp, bytes(self._chunk(rev)))
3124 cachedelta = (dp, bytes(self._chunk(rev)))
3125
3125
3126 sidedata = None
3126 sidedata = None
3127 if not cachedelta:
3127 if not cachedelta:
3128 rawtext = self._revisiondata(rev)
3128 rawtext = self._revisiondata(rev)
3129 sidedata = self.sidedata(rev)
3129 sidedata = self.sidedata(rev)
3130 if sidedata is None:
3130 if sidedata is None:
3131 sidedata = self.sidedata(rev)
3131 sidedata = self.sidedata(rev)
3132
3132
3133 if sidedata_helpers is not None:
3133 if sidedata_helpers is not None:
3134 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3134 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3135 self, sidedata_helpers, sidedata, rev
3135 self, sidedata_helpers, sidedata, rev
3136 )
3136 )
3137 flags = flags | new_flags[0] & ~new_flags[1]
3137 flags = flags | new_flags[0] & ~new_flags[1]
3138
3138
3139 with destrevlog._writing(tr):
3139 with destrevlog._writing(tr):
3140 destrevlog._addrevision(
3140 destrevlog._addrevision(
3141 node,
3141 node,
3142 rawtext,
3142 rawtext,
3143 tr,
3143 tr,
3144 linkrev,
3144 linkrev,
3145 p1,
3145 p1,
3146 p2,
3146 p2,
3147 flags,
3147 flags,
3148 cachedelta,
3148 cachedelta,
3149 deltacomputer=deltacomputer,
3149 deltacomputer=deltacomputer,
3150 sidedata=sidedata,
3150 sidedata=sidedata,
3151 )
3151 )
3152
3152
3153 if addrevisioncb:
3153 if addrevisioncb:
3154 addrevisioncb(self, rev, node)
3154 addrevisioncb(self, rev, node)
3155
3155
3156 def censorrevision(self, tr, censornode, tombstone=b''):
3156 def censorrevision(self, tr, censornode, tombstone=b''):
3157 if self._format_version == REVLOGV0:
3157 if self._format_version == REVLOGV0:
3158 raise error.RevlogError(
3158 raise error.RevlogError(
3159 _(b'cannot censor with version %d revlogs')
3159 _(b'cannot censor with version %d revlogs')
3160 % self._format_version
3160 % self._format_version
3161 )
3161 )
3162 elif self._format_version == REVLOGV1:
3162 elif self._format_version == REVLOGV1:
3163 rewrite.v1_censor(self, tr, censornode, tombstone)
3163 rewrite.v1_censor(self, tr, censornode, tombstone)
3164 else:
3164 else:
3165 rewrite.v2_censor(self, tr, censornode, tombstone)
3165 rewrite.v2_censor(self, tr, censornode, tombstone)
3166
3166
3167 def verifyintegrity(self, state):
3167 def verifyintegrity(self, state):
3168 """Verifies the integrity of the revlog.
3168 """Verifies the integrity of the revlog.
3169
3169
3170 Yields ``revlogproblem`` instances describing problems that are
3170 Yields ``revlogproblem`` instances describing problems that are
3171 found.
3171 found.
3172 """
3172 """
3173 dd, di = self.checksize()
3173 dd, di = self.checksize()
3174 if dd:
3174 if dd:
3175 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3175 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3176 if di:
3176 if di:
3177 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3177 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3178
3178
3179 version = self._format_version
3179 version = self._format_version
3180
3180
3181 # The verifier tells us what version revlog we should be.
3181 # The verifier tells us what version revlog we should be.
3182 if version != state[b'expectedversion']:
3182 if version != state[b'expectedversion']:
3183 yield revlogproblem(
3183 yield revlogproblem(
3184 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3184 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3185 % (self.display_id, version, state[b'expectedversion'])
3185 % (self.display_id, version, state[b'expectedversion'])
3186 )
3186 )
3187
3187
3188 state[b'skipread'] = set()
3188 state[b'skipread'] = set()
3189 state[b'safe_renamed'] = set()
3189 state[b'safe_renamed'] = set()
3190
3190
3191 for rev in self:
3191 for rev in self:
3192 node = self.node(rev)
3192 node = self.node(rev)
3193
3193
3194 # Verify contents. 4 cases to care about:
3194 # Verify contents. 4 cases to care about:
3195 #
3195 #
3196 # common: the most common case
3196 # common: the most common case
3197 # rename: with a rename
3197 # rename: with a rename
3198 # meta: file content starts with b'\1\n', the metadata
3198 # meta: file content starts with b'\1\n', the metadata
3199 # header defined in filelog.py, but without a rename
3199 # header defined in filelog.py, but without a rename
3200 # ext: content stored externally
3200 # ext: content stored externally
3201 #
3201 #
3202 # More formally, their differences are shown below:
3202 # More formally, their differences are shown below:
3203 #
3203 #
3204 # | common | rename | meta | ext
3204 # | common | rename | meta | ext
3205 # -------------------------------------------------------
3205 # -------------------------------------------------------
3206 # flags() | 0 | 0 | 0 | not 0
3206 # flags() | 0 | 0 | 0 | not 0
3207 # renamed() | False | True | False | ?
3207 # renamed() | False | True | False | ?
3208 # rawtext[0:2]=='\1\n'| False | True | True | ?
3208 # rawtext[0:2]=='\1\n'| False | True | True | ?
3209 #
3209 #
3210 # "rawtext" means the raw text stored in revlog data, which
3210 # "rawtext" means the raw text stored in revlog data, which
3211 # could be retrieved by "rawdata(rev)". "text"
3211 # could be retrieved by "rawdata(rev)". "text"
3212 # mentioned below is "revision(rev)".
3212 # mentioned below is "revision(rev)".
3213 #
3213 #
3214 # There are 3 different lengths stored physically:
3214 # There are 3 different lengths stored physically:
3215 # 1. L1: rawsize, stored in revlog index
3215 # 1. L1: rawsize, stored in revlog index
3216 # 2. L2: len(rawtext), stored in revlog data
3216 # 2. L2: len(rawtext), stored in revlog data
3217 # 3. L3: len(text), stored in revlog data if flags==0, or
3217 # 3. L3: len(text), stored in revlog data if flags==0, or
3218 # possibly somewhere else if flags!=0
3218 # possibly somewhere else if flags!=0
3219 #
3219 #
3220 # L1 should be equal to L2. L3 could be different from them.
3220 # L1 should be equal to L2. L3 could be different from them.
3221 # "text" may or may not affect commit hash depending on flag
3221 # "text" may or may not affect commit hash depending on flag
3222 # processors (see flagutil.addflagprocessor).
3222 # processors (see flagutil.addflagprocessor).
3223 #
3223 #
3224 # | common | rename | meta | ext
3224 # | common | rename | meta | ext
3225 # -------------------------------------------------
3225 # -------------------------------------------------
3226 # rawsize() | L1 | L1 | L1 | L1
3226 # rawsize() | L1 | L1 | L1 | L1
3227 # size() | L1 | L2-LM | L1(*) | L1 (?)
3227 # size() | L1 | L2-LM | L1(*) | L1 (?)
3228 # len(rawtext) | L2 | L2 | L2 | L2
3228 # len(rawtext) | L2 | L2 | L2 | L2
3229 # len(text) | L2 | L2 | L2 | L3
3229 # len(text) | L2 | L2 | L2 | L3
3230 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3230 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3231 #
3231 #
3232 # LM: length of metadata, depending on rawtext
3232 # LM: length of metadata, depending on rawtext
3233 # (*): not ideal, see comment in filelog.size
3233 # (*): not ideal, see comment in filelog.size
3234 # (?): could be "- len(meta)" if the resolved content has
3234 # (?): could be "- len(meta)" if the resolved content has
3235 # rename metadata
3235 # rename metadata
3236 #
3236 #
3237 # Checks needed to be done:
3237 # Checks needed to be done:
3238 # 1. length check: L1 == L2, in all cases.
3238 # 1. length check: L1 == L2, in all cases.
3239 # 2. hash check: depending on flag processor, we may need to
3239 # 2. hash check: depending on flag processor, we may need to
3240 # use either "text" (external), or "rawtext" (in revlog).
3240 # use either "text" (external), or "rawtext" (in revlog).
3241
3241
3242 try:
3242 try:
3243 skipflags = state.get(b'skipflags', 0)
3243 skipflags = state.get(b'skipflags', 0)
3244 if skipflags:
3244 if skipflags:
3245 skipflags &= self.flags(rev)
3245 skipflags &= self.flags(rev)
3246
3246
3247 _verify_revision(self, skipflags, state, node)
3247 _verify_revision(self, skipflags, state, node)
3248
3248
3249 l1 = self.rawsize(rev)
3249 l1 = self.rawsize(rev)
3250 l2 = len(self.rawdata(node))
3250 l2 = len(self.rawdata(node))
3251
3251
3252 if l1 != l2:
3252 if l1 != l2:
3253 yield revlogproblem(
3253 yield revlogproblem(
3254 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3254 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3255 node=node,
3255 node=node,
3256 )
3256 )
3257
3257
3258 except error.CensoredNodeError:
3258 except error.CensoredNodeError:
3259 if state[b'erroroncensored']:
3259 if state[b'erroroncensored']:
3260 yield revlogproblem(
3260 yield revlogproblem(
3261 error=_(b'censored file data'), node=node
3261 error=_(b'censored file data'), node=node
3262 )
3262 )
3263 state[b'skipread'].add(node)
3263 state[b'skipread'].add(node)
3264 except Exception as e:
3264 except Exception as e:
3265 yield revlogproblem(
3265 yield revlogproblem(
3266 error=_(b'unpacking %s: %s')
3266 error=_(b'unpacking %s: %s')
3267 % (short(node), stringutil.forcebytestr(e)),
3267 % (short(node), stringutil.forcebytestr(e)),
3268 node=node,
3268 node=node,
3269 )
3269 )
3270 state[b'skipread'].add(node)
3270 state[b'skipread'].add(node)
3271
3271
3272 def storageinfo(
3272 def storageinfo(
3273 self,
3273 self,
3274 exclusivefiles=False,
3274 exclusivefiles=False,
3275 sharedfiles=False,
3275 sharedfiles=False,
3276 revisionscount=False,
3276 revisionscount=False,
3277 trackedsize=False,
3277 trackedsize=False,
3278 storedsize=False,
3278 storedsize=False,
3279 ):
3279 ):
3280 d = {}
3280 d = {}
3281
3281
3282 if exclusivefiles:
3282 if exclusivefiles:
3283 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3283 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3284 if not self._inline:
3284 if not self._inline:
3285 d[b'exclusivefiles'].append((self.opener, self._datafile))
3285 d[b'exclusivefiles'].append((self.opener, self._datafile))
3286
3286
3287 if sharedfiles:
3287 if sharedfiles:
3288 d[b'sharedfiles'] = []
3288 d[b'sharedfiles'] = []
3289
3289
3290 if revisionscount:
3290 if revisionscount:
3291 d[b'revisionscount'] = len(self)
3291 d[b'revisionscount'] = len(self)
3292
3292
3293 if trackedsize:
3293 if trackedsize:
3294 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3294 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3295
3295
3296 if storedsize:
3296 if storedsize:
3297 d[b'storedsize'] = sum(
3297 d[b'storedsize'] = sum(
3298 self.opener.stat(path).st_size for path in self.files()
3298 self.opener.stat(path).st_size for path in self.files()
3299 )
3299 )
3300
3300
3301 return d
3301 return d
3302
3302
3303 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3303 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3304 if not self.hassidedata:
3304 if not self.hassidedata:
3305 return
3305 return
3306 # revlog formats with sidedata support does not support inline
3306 # revlog formats with sidedata support does not support inline
3307 assert not self._inline
3307 assert not self._inline
3308 if not helpers[1] and not helpers[2]:
3308 if not helpers[1] and not helpers[2]:
3309 # Nothing to generate or remove
3309 # Nothing to generate or remove
3310 return
3310 return
3311
3311
3312 new_entries = []
3312 new_entries = []
3313 # append the new sidedata
3313 # append the new sidedata
3314 with self._writing(transaction):
3314 with self._writing(transaction):
3315 ifh, dfh, sdfh = self._writinghandles
3315 ifh, dfh, sdfh = self._writinghandles
3316 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3316 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3317
3317
3318 current_offset = sdfh.tell()
3318 current_offset = sdfh.tell()
3319 for rev in range(startrev, endrev + 1):
3319 for rev in range(startrev, endrev + 1):
3320 entry = self.index[rev]
3320 entry = self.index[rev]
3321 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3321 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3322 store=self,
3322 store=self,
3323 sidedata_helpers=helpers,
3323 sidedata_helpers=helpers,
3324 sidedata={},
3324 sidedata={},
3325 rev=rev,
3325 rev=rev,
3326 )
3326 )
3327
3327
3328 serialized_sidedata = sidedatautil.serialize_sidedata(
3328 serialized_sidedata = sidedatautil.serialize_sidedata(
3329 new_sidedata
3329 new_sidedata
3330 )
3330 )
3331
3331
3332 sidedata_compression_mode = COMP_MODE_INLINE
3332 sidedata_compression_mode = COMP_MODE_INLINE
3333 if serialized_sidedata and self.hassidedata:
3333 if serialized_sidedata and self.hassidedata:
3334 sidedata_compression_mode = COMP_MODE_PLAIN
3334 sidedata_compression_mode = COMP_MODE_PLAIN
3335 h, comp_sidedata = self.compress(serialized_sidedata)
3335 h, comp_sidedata = self.compress(serialized_sidedata)
3336 if (
3336 if (
3337 h != b'u'
3337 h != b'u'
3338 and comp_sidedata[0] != b'\0'
3338 and comp_sidedata[0] != b'\0'
3339 and len(comp_sidedata) < len(serialized_sidedata)
3339 and len(comp_sidedata) < len(serialized_sidedata)
3340 ):
3340 ):
3341 assert not h
3341 assert not h
3342 if (
3342 if (
3343 comp_sidedata[0]
3343 comp_sidedata[0]
3344 == self._docket.default_compression_header
3344 == self._docket.default_compression_header
3345 ):
3345 ):
3346 sidedata_compression_mode = COMP_MODE_DEFAULT
3346 sidedata_compression_mode = COMP_MODE_DEFAULT
3347 serialized_sidedata = comp_sidedata
3347 serialized_sidedata = comp_sidedata
3348 else:
3348 else:
3349 sidedata_compression_mode = COMP_MODE_INLINE
3349 sidedata_compression_mode = COMP_MODE_INLINE
3350 serialized_sidedata = comp_sidedata
3350 serialized_sidedata = comp_sidedata
3351 if entry[8] != 0 or entry[9] != 0:
3351 if entry[8] != 0 or entry[9] != 0:
3352 # rewriting entries that already have sidedata is not
3352 # rewriting entries that already have sidedata is not
3353 # supported yet, because it introduces garbage data in the
3353 # supported yet, because it introduces garbage data in the
3354 # revlog.
3354 # revlog.
3355 msg = b"rewriting existing sidedata is not supported yet"
3355 msg = b"rewriting existing sidedata is not supported yet"
3356 raise error.Abort(msg)
3356 raise error.Abort(msg)
3357
3357
3358 # Apply (potential) flags to add and to remove after running
3358 # Apply (potential) flags to add and to remove after running
3359 # the sidedata helpers
3359 # the sidedata helpers
3360 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3360 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3361 entry_update = (
3361 entry_update = (
3362 current_offset,
3362 current_offset,
3363 len(serialized_sidedata),
3363 len(serialized_sidedata),
3364 new_offset_flags,
3364 new_offset_flags,
3365 sidedata_compression_mode,
3365 sidedata_compression_mode,
3366 )
3366 )
3367
3367
3368 # the sidedata computation might have move the file cursors around
3368 # the sidedata computation might have move the file cursors around
3369 sdfh.seek(current_offset, os.SEEK_SET)
3369 sdfh.seek(current_offset, os.SEEK_SET)
3370 sdfh.write(serialized_sidedata)
3370 sdfh.write(serialized_sidedata)
3371 new_entries.append(entry_update)
3371 new_entries.append(entry_update)
3372 current_offset += len(serialized_sidedata)
3372 current_offset += len(serialized_sidedata)
3373 self._docket.sidedata_end = sdfh.tell()
3373 self._docket.sidedata_end = sdfh.tell()
3374
3374
3375 # rewrite the new index entries
3375 # rewrite the new index entries
3376 ifh.seek(startrev * self.index.entry_size)
3376 ifh.seek(startrev * self.index.entry_size)
3377 for i, e in enumerate(new_entries):
3377 for i, e in enumerate(new_entries):
3378 rev = startrev + i
3378 rev = startrev + i
3379 self.index.replace_sidedata_info(rev, *e)
3379 self.index.replace_sidedata_info(rev, *e)
3380 packed = self.index.entry_binary(rev)
3380 packed = self.index.entry_binary(rev)
3381 if rev == 0 and self._docket is None:
3381 if rev == 0 and self._docket is None:
3382 header = self._format_flags | self._format_version
3382 header = self._format_flags | self._format_version
3383 header = self.index.pack_header(header)
3383 header = self.index.pack_header(header)
3384 packed = header + packed
3384 packed = header + packed
3385 ifh.write(packed)
3385 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now