##// END OF EJS Templates
safehasattr: pass attribute name as string instead of bytes...
marmoute -
r51486:5ae12431 default
parent child Browse files
Show More
@@ -1,3406 +1,3406 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_NO,
43 DELTA_BASE_REUSE_TRY,
43 DELTA_BASE_REUSE_TRY,
44 ENTRY_RANK,
44 ENTRY_RANK,
45 FEATURES_BY_VERSION,
45 FEATURES_BY_VERSION,
46 FLAG_GENERALDELTA,
46 FLAG_GENERALDELTA,
47 FLAG_INLINE_DATA,
47 FLAG_INLINE_DATA,
48 INDEX_HEADER,
48 INDEX_HEADER,
49 KIND_CHANGELOG,
49 KIND_CHANGELOG,
50 KIND_FILELOG,
50 KIND_FILELOG,
51 RANK_UNKNOWN,
51 RANK_UNKNOWN,
52 REVLOGV0,
52 REVLOGV0,
53 REVLOGV1,
53 REVLOGV1,
54 REVLOGV1_FLAGS,
54 REVLOGV1_FLAGS,
55 REVLOGV2,
55 REVLOGV2,
56 REVLOGV2_FLAGS,
56 REVLOGV2_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FLAGS,
58 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_FORMAT,
59 REVLOG_DEFAULT_VERSION,
59 REVLOG_DEFAULT_VERSION,
60 SUPPORTED_FLAGS,
60 SUPPORTED_FLAGS,
61 )
61 )
62 from .revlogutils.flagutil import (
62 from .revlogutils.flagutil import (
63 REVIDX_DEFAULT_FLAGS,
63 REVIDX_DEFAULT_FLAGS,
64 REVIDX_ELLIPSIS,
64 REVIDX_ELLIPSIS,
65 REVIDX_EXTSTORED,
65 REVIDX_EXTSTORED,
66 REVIDX_FLAGS_ORDER,
66 REVIDX_FLAGS_ORDER,
67 REVIDX_HASCOPIESINFO,
67 REVIDX_HASCOPIESINFO,
68 REVIDX_ISCENSORED,
68 REVIDX_ISCENSORED,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 REVIDX_RAWTEXT_CHANGING_FLAGS,
70 )
70 )
71 from .thirdparty import attr
71 from .thirdparty import attr
72 from . import (
72 from . import (
73 ancestor,
73 ancestor,
74 dagop,
74 dagop,
75 error,
75 error,
76 mdiff,
76 mdiff,
77 policy,
77 policy,
78 pycompat,
78 pycompat,
79 revlogutils,
79 revlogutils,
80 templatefilters,
80 templatefilters,
81 util,
81 util,
82 )
82 )
83 from .interfaces import (
83 from .interfaces import (
84 repository,
84 repository,
85 util as interfaceutil,
85 util as interfaceutil,
86 )
86 )
87 from .revlogutils import (
87 from .revlogutils import (
88 deltas as deltautil,
88 deltas as deltautil,
89 docket as docketutil,
89 docket as docketutil,
90 flagutil,
90 flagutil,
91 nodemap as nodemaputil,
91 nodemap as nodemaputil,
92 randomaccessfile,
92 randomaccessfile,
93 revlogv0,
93 revlogv0,
94 rewrite,
94 rewrite,
95 sidedata as sidedatautil,
95 sidedata as sidedatautil,
96 )
96 )
97 from .utils import (
97 from .utils import (
98 storageutil,
98 storageutil,
99 stringutil,
99 stringutil,
100 )
100 )
101
101
102 # blanked usage of all the name to prevent pyflakes constraints
102 # blanked usage of all the name to prevent pyflakes constraints
103 # We need these name available in the module for extensions.
103 # We need these name available in the module for extensions.
104
104
105 REVLOGV0
105 REVLOGV0
106 REVLOGV1
106 REVLOGV1
107 REVLOGV2
107 REVLOGV2
108 CHANGELOGV2
108 CHANGELOGV2
109 FLAG_INLINE_DATA
109 FLAG_INLINE_DATA
110 FLAG_GENERALDELTA
110 FLAG_GENERALDELTA
111 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FLAGS
112 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_FORMAT
113 REVLOG_DEFAULT_VERSION
113 REVLOG_DEFAULT_VERSION
114 REVLOGV1_FLAGS
114 REVLOGV1_FLAGS
115 REVLOGV2_FLAGS
115 REVLOGV2_FLAGS
116 REVIDX_ISCENSORED
116 REVIDX_ISCENSORED
117 REVIDX_ELLIPSIS
117 REVIDX_ELLIPSIS
118 REVIDX_HASCOPIESINFO
118 REVIDX_HASCOPIESINFO
119 REVIDX_EXTSTORED
119 REVIDX_EXTSTORED
120 REVIDX_DEFAULT_FLAGS
120 REVIDX_DEFAULT_FLAGS
121 REVIDX_FLAGS_ORDER
121 REVIDX_FLAGS_ORDER
122 REVIDX_RAWTEXT_CHANGING_FLAGS
122 REVIDX_RAWTEXT_CHANGING_FLAGS
123
123
124 parsers = policy.importmod('parsers')
124 parsers = policy.importmod('parsers')
125 rustancestor = policy.importrust('ancestor')
125 rustancestor = policy.importrust('ancestor')
126 rustdagop = policy.importrust('dagop')
126 rustdagop = policy.importrust('dagop')
127 rustrevlog = policy.importrust('revlog')
127 rustrevlog = policy.importrust('revlog')
128
128
129 # Aliased for performance.
129 # Aliased for performance.
130 _zlibdecompress = zlib.decompress
130 _zlibdecompress = zlib.decompress
131
131
132 # max size of inline data embedded into a revlog
132 # max size of inline data embedded into a revlog
133 _maxinline = 131072
133 _maxinline = 131072
134
134
135 # Flag processors for REVIDX_ELLIPSIS.
135 # Flag processors for REVIDX_ELLIPSIS.
136 def ellipsisreadprocessor(rl, text):
136 def ellipsisreadprocessor(rl, text):
137 return text, False
137 return text, False
138
138
139
139
140 def ellipsiswriteprocessor(rl, text):
140 def ellipsiswriteprocessor(rl, text):
141 return text, False
141 return text, False
142
142
143
143
144 def ellipsisrawprocessor(rl, text):
144 def ellipsisrawprocessor(rl, text):
145 return False
145 return False
146
146
147
147
148 ellipsisprocessor = (
148 ellipsisprocessor = (
149 ellipsisreadprocessor,
149 ellipsisreadprocessor,
150 ellipsiswriteprocessor,
150 ellipsiswriteprocessor,
151 ellipsisrawprocessor,
151 ellipsisrawprocessor,
152 )
152 )
153
153
154
154
155 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
158 if skipflags:
158 if skipflags:
159 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
160 else:
160 else:
161 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
162 rl.revision(node)
162 rl.revision(node)
163
163
164
164
165 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
166 #
166 #
167 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
168 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
169 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
172 )
172 )
173
173
174
174
175 @interfaceutil.implementer(repository.irevisiondelta)
175 @interfaceutil.implementer(repository.irevisiondelta)
176 @attr.s(slots=True)
176 @attr.s(slots=True)
177 class revlogrevisiondelta:
177 class revlogrevisiondelta:
178 node = attr.ib()
178 node = attr.ib()
179 p1node = attr.ib()
179 p1node = attr.ib()
180 p2node = attr.ib()
180 p2node = attr.ib()
181 basenode = attr.ib()
181 basenode = attr.ib()
182 flags = attr.ib()
182 flags = attr.ib()
183 baserevisionsize = attr.ib()
183 baserevisionsize = attr.ib()
184 revision = attr.ib()
184 revision = attr.ib()
185 delta = attr.ib()
185 delta = attr.ib()
186 sidedata = attr.ib()
186 sidedata = attr.ib()
187 protocol_flags = attr.ib()
187 protocol_flags = attr.ib()
188 linknode = attr.ib(default=None)
188 linknode = attr.ib(default=None)
189
189
190
190
191 @interfaceutil.implementer(repository.iverifyproblem)
191 @interfaceutil.implementer(repository.iverifyproblem)
192 @attr.s(frozen=True)
192 @attr.s(frozen=True)
193 class revlogproblem:
193 class revlogproblem:
194 warning = attr.ib(default=None)
194 warning = attr.ib(default=None)
195 error = attr.ib(default=None)
195 error = attr.ib(default=None)
196 node = attr.ib(default=None)
196 node = attr.ib(default=None)
197
197
198
198
199 def parse_index_v1(data, inline):
199 def parse_index_v1(data, inline):
200 # call the C implementation to parse the index data
200 # call the C implementation to parse the index data
201 index, cache = parsers.parse_index2(data, inline)
201 index, cache = parsers.parse_index2(data, inline)
202 return index, cache
202 return index, cache
203
203
204
204
205 def parse_index_v2(data, inline):
205 def parse_index_v2(data, inline):
206 # call the C implementation to parse the index data
206 # call the C implementation to parse the index data
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
208 return index, cache
208 return index, cache
209
209
210
210
211 def parse_index_cl_v2(data, inline):
211 def parse_index_cl_v2(data, inline):
212 # call the C implementation to parse the index data
212 # call the C implementation to parse the index data
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
214 return index, cache
214 return index, cache
215
215
216
216
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
217 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
218
218
219 def parse_index_v1_nodemap(data, inline):
219 def parse_index_v1_nodemap(data, inline):
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 index, cache = parsers.parse_index_devel_nodemap(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 else:
224 else:
225 parse_index_v1_nodemap = None
225 parse_index_v1_nodemap = None
226
226
227
227
228 def parse_index_v1_mixed(data, inline):
228 def parse_index_v1_mixed(data, inline):
229 index, cache = parse_index_v1(data, inline)
229 index, cache = parse_index_v1(data, inline)
230 return rustrevlog.MixedIndex(index), cache
230 return rustrevlog.MixedIndex(index), cache
231
231
232
232
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
234 # signed integer)
234 # signed integer)
235 _maxentrysize = 0x7FFFFFFF
235 _maxentrysize = 0x7FFFFFFF
236
236
237 FILE_TOO_SHORT_MSG = _(
237 FILE_TOO_SHORT_MSG = _(
238 b'cannot read from revlog %s;'
238 b'cannot read from revlog %s;'
239 b' expected %d bytes from offset %d, data size is %d'
239 b' expected %d bytes from offset %d, data size is %d'
240 )
240 )
241
241
242 hexdigits = b'0123456789abcdefABCDEF'
242 hexdigits = b'0123456789abcdefABCDEF'
243
243
244
244
245 class revlog:
245 class revlog:
246 """
246 """
247 the underlying revision storage object
247 the underlying revision storage object
248
248
249 A revlog consists of two parts, an index and the revision data.
249 A revlog consists of two parts, an index and the revision data.
250
250
251 The index is a file with a fixed record size containing
251 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
252 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
253 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
254 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
255 contains a linkrev entry that can serve as a pointer to external
256 data.
256 data.
257
257
258 The revision data itself is a linear collection of data chunks.
258 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
259 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
260 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
261 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
262 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
263 its size, or O(1) relative to the number of revisions.
264
264
265 Both pieces of the revlog are written to in an append-only
265 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
266 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
267 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
268 for locking while reading.
269
269
270 If checkambig, indexfile is opened with checkambig=True at
270 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
271 writing, to avoid file stat ambiguity.
272
272
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
274 index will be mmapped rather than read if it is larger than the
275 configured threshold.
275 configured threshold.
276
276
277 If censorable is True, the revlog can have censored revisions.
277 If censorable is True, the revlog can have censored revisions.
278
278
279 If `upperboundcomp` is not None, this is the expected maximal gain from
279 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
280 compression for the data content.
281
281
282 `concurrencychecker` is an optional function that receives 3 arguments: a
282 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
283 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
284 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
285 raising).
286
286
287 See mercurial/revlogutils/contants.py for details about the content of an
287 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
288 index entry.
289 """
289 """
290
290
291 _flagserrorclass = error.RevlogError
291 _flagserrorclass = error.RevlogError
292
292
293 def __init__(
293 def __init__(
294 self,
294 self,
295 opener,
295 opener,
296 target,
296 target,
297 radix,
297 radix,
298 postfix=None, # only exist for `tmpcensored` now
298 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
299 checkambig=False,
300 mmaplargeindex=False,
300 mmaplargeindex=False,
301 censorable=False,
301 censorable=False,
302 upperboundcomp=None,
302 upperboundcomp=None,
303 persistentnodemap=False,
303 persistentnodemap=False,
304 concurrencychecker=None,
304 concurrencychecker=None,
305 trypending=False,
305 trypending=False,
306 try_split=False,
306 try_split=False,
307 canonical_parent_order=True,
307 canonical_parent_order=True,
308 ):
308 ):
309 """
309 """
310 create a revlog object
310 create a revlog object
311
311
312 opener is a function that abstracts the file opening operation
312 opener is a function that abstracts the file opening operation
313 and can be used to implement COW semantics or the like.
313 and can be used to implement COW semantics or the like.
314
314
315 `target`: a (KIND, ID) tuple that identify the content stored in
315 `target`: a (KIND, ID) tuple that identify the content stored in
316 this revlog. It help the rest of the code to understand what the revlog
316 this revlog. It help the rest of the code to understand what the revlog
317 is about without having to resort to heuristic and index filename
317 is about without having to resort to heuristic and index filename
318 analysis. Note: that this must be reliably be set by normal code, but
318 analysis. Note: that this must be reliably be set by normal code, but
319 that test, debug, or performance measurement code might not set this to
319 that test, debug, or performance measurement code might not set this to
320 accurate value.
320 accurate value.
321 """
321 """
322 self.upperboundcomp = upperboundcomp
322 self.upperboundcomp = upperboundcomp
323
323
324 self.radix = radix
324 self.radix = radix
325
325
326 self._docket_file = None
326 self._docket_file = None
327 self._indexfile = None
327 self._indexfile = None
328 self._datafile = None
328 self._datafile = None
329 self._sidedatafile = None
329 self._sidedatafile = None
330 self._nodemap_file = None
330 self._nodemap_file = None
331 self.postfix = postfix
331 self.postfix = postfix
332 self._trypending = trypending
332 self._trypending = trypending
333 self._try_split = try_split
333 self._try_split = try_split
334 self.opener = opener
334 self.opener = opener
335 if persistentnodemap:
335 if persistentnodemap:
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
336 self._nodemap_file = nodemaputil.get_nodemap_file(self)
337
337
338 assert target[0] in ALL_KINDS
338 assert target[0] in ALL_KINDS
339 assert len(target) == 2
339 assert len(target) == 2
340 self.target = target
340 self.target = target
341 # When True, indexfile is opened with checkambig=True at writing, to
341 # When True, indexfile is opened with checkambig=True at writing, to
342 # avoid file stat ambiguity.
342 # avoid file stat ambiguity.
343 self._checkambig = checkambig
343 self._checkambig = checkambig
344 self._mmaplargeindex = mmaplargeindex
344 self._mmaplargeindex = mmaplargeindex
345 self._censorable = censorable
345 self._censorable = censorable
346 # 3-tuple of (node, rev, text) for a raw revision.
346 # 3-tuple of (node, rev, text) for a raw revision.
347 self._revisioncache = None
347 self._revisioncache = None
348 # Maps rev to chain base rev.
348 # Maps rev to chain base rev.
349 self._chainbasecache = util.lrucachedict(100)
349 self._chainbasecache = util.lrucachedict(100)
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
350 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
351 self._chunkcache = (0, b'')
351 self._chunkcache = (0, b'')
352 # How much data to read and cache into the raw revlog data cache.
352 # How much data to read and cache into the raw revlog data cache.
353 self._chunkcachesize = 65536
353 self._chunkcachesize = 65536
354 self._maxchainlen = None
354 self._maxchainlen = None
355 self._deltabothparents = True
355 self._deltabothparents = True
356 self._candidate_group_chunk_size = 0
356 self._candidate_group_chunk_size = 0
357 self._debug_delta = False
357 self._debug_delta = False
358 self.index = None
358 self.index = None
359 self._docket = None
359 self._docket = None
360 self._nodemap_docket = None
360 self._nodemap_docket = None
361 # Mapping of partial identifiers to full nodes.
361 # Mapping of partial identifiers to full nodes.
362 self._pcache = {}
362 self._pcache = {}
363 # Mapping of revision integer to full node.
363 # Mapping of revision integer to full node.
364 self._compengine = b'zlib'
364 self._compengine = b'zlib'
365 self._compengineopts = {}
365 self._compengineopts = {}
366 self._maxdeltachainspan = -1
366 self._maxdeltachainspan = -1
367 self._withsparseread = False
367 self._withsparseread = False
368 self._sparserevlog = False
368 self._sparserevlog = False
369 self.hassidedata = False
369 self.hassidedata = False
370 self._srdensitythreshold = 0.50
370 self._srdensitythreshold = 0.50
371 self._srmingapsize = 262144
371 self._srmingapsize = 262144
372
372
373 # other optionnals features
373 # other optionnals features
374
374
375 # might remove rank configuration once the computation has no impact
375 # might remove rank configuration once the computation has no impact
376 self._compute_rank = False
376 self._compute_rank = False
377
377
378 # Make copy of flag processors so each revlog instance can support
378 # Make copy of flag processors so each revlog instance can support
379 # custom flags.
379 # custom flags.
380 self._flagprocessors = dict(flagutil.flagprocessors)
380 self._flagprocessors = dict(flagutil.flagprocessors)
381
381
382 # 3-tuple of file handles being used for active writing.
382 # 3-tuple of file handles being used for active writing.
383 self._writinghandles = None
383 self._writinghandles = None
384 # prevent nesting of addgroup
384 # prevent nesting of addgroup
385 self._adding_group = None
385 self._adding_group = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 self._concurrencychecker = concurrencychecker
389 self._concurrencychecker = concurrencychecker
390
390
391 # parent order is supposed to be semantically irrelevant, so we
391 # parent order is supposed to be semantically irrelevant, so we
392 # normally resort parents to ensure that the first parent is non-null,
392 # normally resort parents to ensure that the first parent is non-null,
393 # if there is a non-null parent at all.
393 # if there is a non-null parent at all.
394 # filelog abuses the parent order as flag to mark some instances of
394 # filelog abuses the parent order as flag to mark some instances of
395 # meta-encoded files, so allow it to disable this behavior.
395 # meta-encoded files, so allow it to disable this behavior.
396 self.canonical_parent_order = canonical_parent_order
396 self.canonical_parent_order = canonical_parent_order
397
397
398 def _init_opts(self):
398 def _init_opts(self):
399 """process options (from above/config) to setup associated default revlog mode
399 """process options (from above/config) to setup associated default revlog mode
400
400
401 These values might be affected when actually reading on disk information.
401 These values might be affected when actually reading on disk information.
402
402
403 The relevant values are returned for use in _loadindex().
403 The relevant values are returned for use in _loadindex().
404
404
405 * newversionflags:
405 * newversionflags:
406 version header to use if we need to create a new revlog
406 version header to use if we need to create a new revlog
407
407
408 * mmapindexthreshold:
408 * mmapindexthreshold:
409 minimal index size for start to use mmap
409 minimal index size for start to use mmap
410
410
411 * force_nodemap:
411 * force_nodemap:
412 force the usage of a "development" version of the nodemap code
412 force the usage of a "development" version of the nodemap code
413 """
413 """
414 mmapindexthreshold = None
414 mmapindexthreshold = None
415 opts = self.opener.options
415 opts = self.opener.options
416
416
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
417 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
418 new_header = CHANGELOGV2
418 new_header = CHANGELOGV2
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
419 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
420 elif b'revlogv2' in opts:
420 elif b'revlogv2' in opts:
421 new_header = REVLOGV2
421 new_header = REVLOGV2
422 elif b'revlogv1' in opts:
422 elif b'revlogv1' in opts:
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
423 new_header = REVLOGV1 | FLAG_INLINE_DATA
424 if b'generaldelta' in opts:
424 if b'generaldelta' in opts:
425 new_header |= FLAG_GENERALDELTA
425 new_header |= FLAG_GENERALDELTA
426 elif b'revlogv0' in self.opener.options:
426 elif b'revlogv0' in self.opener.options:
427 new_header = REVLOGV0
427 new_header = REVLOGV0
428 else:
428 else:
429 new_header = REVLOG_DEFAULT_VERSION
429 new_header = REVLOG_DEFAULT_VERSION
430
430
431 if b'chunkcachesize' in opts:
431 if b'chunkcachesize' in opts:
432 self._chunkcachesize = opts[b'chunkcachesize']
432 self._chunkcachesize = opts[b'chunkcachesize']
433 if b'maxchainlen' in opts:
433 if b'maxchainlen' in opts:
434 self._maxchainlen = opts[b'maxchainlen']
434 self._maxchainlen = opts[b'maxchainlen']
435 if b'deltabothparents' in opts:
435 if b'deltabothparents' in opts:
436 self._deltabothparents = opts[b'deltabothparents']
436 self._deltabothparents = opts[b'deltabothparents']
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
437 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
438 if dps_cgds:
438 if dps_cgds:
439 self._candidate_group_chunk_size = dps_cgds
439 self._candidate_group_chunk_size = dps_cgds
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
441 self._lazydeltabase = False
442 if self._lazydelta:
442 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'debug-delta' in opts:
444 if b'debug-delta' in opts:
445 self._debug_delta = opts[b'debug-delta']
445 self._debug_delta = opts[b'debug-delta']
446 if b'compengine' in opts:
446 if b'compengine' in opts:
447 self._compengine = opts[b'compengine']
447 self._compengine = opts[b'compengine']
448 if b'zlib.level' in opts:
448 if b'zlib.level' in opts:
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
449 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
450 if b'zstd.level' in opts:
450 if b'zstd.level' in opts:
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
451 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
452 if b'maxdeltachainspan' in opts:
452 if b'maxdeltachainspan' in opts:
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
453 self._maxdeltachainspan = opts[b'maxdeltachainspan']
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
454 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
455 mmapindexthreshold = opts[b'mmapindexthreshold']
455 mmapindexthreshold = opts[b'mmapindexthreshold']
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
456 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
457 withsparseread = bool(opts.get(b'with-sparse-read', False))
458 # sparse-revlog forces sparse-read
458 # sparse-revlog forces sparse-read
459 self._withsparseread = self._sparserevlog or withsparseread
459 self._withsparseread = self._sparserevlog or withsparseread
460 if b'sparse-read-density-threshold' in opts:
460 if b'sparse-read-density-threshold' in opts:
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
461 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
462 if b'sparse-read-min-gap-size' in opts:
462 if b'sparse-read-min-gap-size' in opts:
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
463 self._srmingapsize = opts[b'sparse-read-min-gap-size']
464 if opts.get(b'enableellipsis'):
464 if opts.get(b'enableellipsis'):
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
465 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
466
466
467 # revlog v0 doesn't have flag processors
467 # revlog v0 doesn't have flag processors
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
468 for flag, processor in opts.get(b'flagprocessors', {}).items():
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
470
471 if self._chunkcachesize <= 0:
471 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
472 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
473 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
474 % self._chunkcachesize
475 )
475 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
477 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
478 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
479 % self._chunkcachesize
480 )
480 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
482 return new_header, mmapindexthreshold, force_nodemap
483
483
484 def _get_data(self, filepath, mmap_threshold, size=None):
484 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
485 """return a file content with or without mmap
486
486
487 If the file is missing return the empty string"""
487 If the file is missing return the empty string"""
488 try:
488 try:
489 with self.opener(filepath) as fp:
489 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
490 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
491 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
492 if file_size >= mmap_threshold:
493 if size is not None:
493 if size is not None:
494 # avoid potentiel mmap crash
494 # avoid potentiel mmap crash
495 size = min(file_size, size)
495 size = min(file_size, size)
496 # TODO: should .close() to release resources without
496 # TODO: should .close() to release resources without
497 # relying on Python GC
497 # relying on Python GC
498 if size is None:
498 if size is None:
499 return util.buffer(util.mmapread(fp))
499 return util.buffer(util.mmapread(fp))
500 else:
500 else:
501 return util.buffer(util.mmapread(fp, size))
501 return util.buffer(util.mmapread(fp, size))
502 if size is None:
502 if size is None:
503 return fp.read()
503 return fp.read()
504 else:
504 else:
505 return fp.read(size)
505 return fp.read(size)
506 except FileNotFoundError:
506 except FileNotFoundError:
507 return b''
507 return b''
508
508
509 def _loadindex(self, docket=None):
509 def _loadindex(self, docket=None):
510
510
511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
511 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
512
512
513 if self.postfix is not None:
513 if self.postfix is not None:
514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
514 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
515 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
516 entry_point = b'%s.i.a' % self.radix
516 entry_point = b'%s.i.a' % self.radix
517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
517 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
518 entry_point = b'%s.i.s' % self.radix
518 entry_point = b'%s.i.s' % self.radix
519 else:
519 else:
520 entry_point = b'%s.i' % self.radix
520 entry_point = b'%s.i' % self.radix
521
521
522 if docket is not None:
522 if docket is not None:
523 self._docket = docket
523 self._docket = docket
524 self._docket_file = entry_point
524 self._docket_file = entry_point
525 else:
525 else:
526 self._initempty = True
526 self._initempty = True
527 entry_data = self._get_data(entry_point, mmapindexthreshold)
527 entry_data = self._get_data(entry_point, mmapindexthreshold)
528 if len(entry_data) > 0:
528 if len(entry_data) > 0:
529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
529 header = INDEX_HEADER.unpack(entry_data[:4])[0]
530 self._initempty = False
530 self._initempty = False
531 else:
531 else:
532 header = new_header
532 header = new_header
533
533
534 self._format_flags = header & ~0xFFFF
534 self._format_flags = header & ~0xFFFF
535 self._format_version = header & 0xFFFF
535 self._format_version = header & 0xFFFF
536
536
537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
537 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
538 if supported_flags is None:
538 if supported_flags is None:
539 msg = _(b'unknown version (%d) in revlog %s')
539 msg = _(b'unknown version (%d) in revlog %s')
540 msg %= (self._format_version, self.display_id)
540 msg %= (self._format_version, self.display_id)
541 raise error.RevlogError(msg)
541 raise error.RevlogError(msg)
542 elif self._format_flags & ~supported_flags:
542 elif self._format_flags & ~supported_flags:
543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
543 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
544 display_flag = self._format_flags >> 16
544 display_flag = self._format_flags >> 16
545 msg %= (display_flag, self._format_version, self.display_id)
545 msg %= (display_flag, self._format_version, self.display_id)
546 raise error.RevlogError(msg)
546 raise error.RevlogError(msg)
547
547
548 features = FEATURES_BY_VERSION[self._format_version]
548 features = FEATURES_BY_VERSION[self._format_version]
549 self._inline = features[b'inline'](self._format_flags)
549 self._inline = features[b'inline'](self._format_flags)
550 self._generaldelta = features[b'generaldelta'](self._format_flags)
550 self._generaldelta = features[b'generaldelta'](self._format_flags)
551 self.hassidedata = features[b'sidedata']
551 self.hassidedata = features[b'sidedata']
552
552
553 if not features[b'docket']:
553 if not features[b'docket']:
554 self._indexfile = entry_point
554 self._indexfile = entry_point
555 index_data = entry_data
555 index_data = entry_data
556 else:
556 else:
557 self._docket_file = entry_point
557 self._docket_file = entry_point
558 if self._initempty:
558 if self._initempty:
559 self._docket = docketutil.default_docket(self, header)
559 self._docket = docketutil.default_docket(self, header)
560 else:
560 else:
561 self._docket = docketutil.parse_docket(
561 self._docket = docketutil.parse_docket(
562 self, entry_data, use_pending=self._trypending
562 self, entry_data, use_pending=self._trypending
563 )
563 )
564
564
565 if self._docket is not None:
565 if self._docket is not None:
566 self._indexfile = self._docket.index_filepath()
566 self._indexfile = self._docket.index_filepath()
567 index_data = b''
567 index_data = b''
568 index_size = self._docket.index_end
568 index_size = self._docket.index_end
569 if index_size > 0:
569 if index_size > 0:
570 index_data = self._get_data(
570 index_data = self._get_data(
571 self._indexfile, mmapindexthreshold, size=index_size
571 self._indexfile, mmapindexthreshold, size=index_size
572 )
572 )
573 if len(index_data) < index_size:
573 if len(index_data) < index_size:
574 msg = _(b'too few index data for %s: got %d, expected %d')
574 msg = _(b'too few index data for %s: got %d, expected %d')
575 msg %= (self.display_id, len(index_data), index_size)
575 msg %= (self.display_id, len(index_data), index_size)
576 raise error.RevlogError(msg)
576 raise error.RevlogError(msg)
577
577
578 self._inline = False
578 self._inline = False
579 # generaldelta implied by version 2 revlogs.
579 # generaldelta implied by version 2 revlogs.
580 self._generaldelta = True
580 self._generaldelta = True
581 # the logic for persistent nodemap will be dealt with within the
581 # the logic for persistent nodemap will be dealt with within the
582 # main docket, so disable it for now.
582 # main docket, so disable it for now.
583 self._nodemap_file = None
583 self._nodemap_file = None
584
584
585 if self._docket is not None:
585 if self._docket is not None:
586 self._datafile = self._docket.data_filepath()
586 self._datafile = self._docket.data_filepath()
587 self._sidedatafile = self._docket.sidedata_filepath()
587 self._sidedatafile = self._docket.sidedata_filepath()
588 elif self.postfix is None:
588 elif self.postfix is None:
589 self._datafile = b'%s.d' % self.radix
589 self._datafile = b'%s.d' % self.radix
590 else:
590 else:
591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
591 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
592
592
593 self.nodeconstants = sha1nodeconstants
593 self.nodeconstants = sha1nodeconstants
594 self.nullid = self.nodeconstants.nullid
594 self.nullid = self.nodeconstants.nullid
595
595
596 # sparse-revlog can't be on without general-delta (issue6056)
596 # sparse-revlog can't be on without general-delta (issue6056)
597 if not self._generaldelta:
597 if not self._generaldelta:
598 self._sparserevlog = False
598 self._sparserevlog = False
599
599
600 self._storedeltachains = True
600 self._storedeltachains = True
601
601
602 devel_nodemap = (
602 devel_nodemap = (
603 self._nodemap_file
603 self._nodemap_file
604 and force_nodemap
604 and force_nodemap
605 and parse_index_v1_nodemap is not None
605 and parse_index_v1_nodemap is not None
606 )
606 )
607
607
608 use_rust_index = False
608 use_rust_index = False
609 if rustrevlog is not None:
609 if rustrevlog is not None:
610 if self._nodemap_file is not None:
610 if self._nodemap_file is not None:
611 use_rust_index = True
611 use_rust_index = True
612 else:
612 else:
613 use_rust_index = self.opener.options.get(b'rust.index')
613 use_rust_index = self.opener.options.get(b'rust.index')
614
614
615 self._parse_index = parse_index_v1
615 self._parse_index = parse_index_v1
616 if self._format_version == REVLOGV0:
616 if self._format_version == REVLOGV0:
617 self._parse_index = revlogv0.parse_index_v0
617 self._parse_index = revlogv0.parse_index_v0
618 elif self._format_version == REVLOGV2:
618 elif self._format_version == REVLOGV2:
619 self._parse_index = parse_index_v2
619 self._parse_index = parse_index_v2
620 elif self._format_version == CHANGELOGV2:
620 elif self._format_version == CHANGELOGV2:
621 self._parse_index = parse_index_cl_v2
621 self._parse_index = parse_index_cl_v2
622 elif devel_nodemap:
622 elif devel_nodemap:
623 self._parse_index = parse_index_v1_nodemap
623 self._parse_index = parse_index_v1_nodemap
624 elif use_rust_index:
624 elif use_rust_index:
625 self._parse_index = parse_index_v1_mixed
625 self._parse_index = parse_index_v1_mixed
626 try:
626 try:
627 d = self._parse_index(index_data, self._inline)
627 d = self._parse_index(index_data, self._inline)
628 index, chunkcache = d
628 index, chunkcache = d
629 use_nodemap = (
629 use_nodemap = (
630 not self._inline
630 not self._inline
631 and self._nodemap_file is not None
631 and self._nodemap_file is not None
632 and util.safehasattr(index, 'update_nodemap_data')
632 and util.safehasattr(index, 'update_nodemap_data')
633 )
633 )
634 if use_nodemap:
634 if use_nodemap:
635 nodemap_data = nodemaputil.persisted_data(self)
635 nodemap_data = nodemaputil.persisted_data(self)
636 if nodemap_data is not None:
636 if nodemap_data is not None:
637 docket = nodemap_data[0]
637 docket = nodemap_data[0]
638 if (
638 if (
639 len(d[0]) > docket.tip_rev
639 len(d[0]) > docket.tip_rev
640 and d[0][docket.tip_rev][7] == docket.tip_node
640 and d[0][docket.tip_rev][7] == docket.tip_node
641 ):
641 ):
642 # no changelog tampering
642 # no changelog tampering
643 self._nodemap_docket = docket
643 self._nodemap_docket = docket
644 index.update_nodemap_data(*nodemap_data)
644 index.update_nodemap_data(*nodemap_data)
645 except (ValueError, IndexError):
645 except (ValueError, IndexError):
646 raise error.RevlogError(
646 raise error.RevlogError(
647 _(b"index %s is corrupted") % self.display_id
647 _(b"index %s is corrupted") % self.display_id
648 )
648 )
649 self.index = index
649 self.index = index
650 self._segmentfile = randomaccessfile.randomaccessfile(
650 self._segmentfile = randomaccessfile.randomaccessfile(
651 self.opener,
651 self.opener,
652 (self._indexfile if self._inline else self._datafile),
652 (self._indexfile if self._inline else self._datafile),
653 self._chunkcachesize,
653 self._chunkcachesize,
654 chunkcache,
654 chunkcache,
655 )
655 )
656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
656 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
657 self.opener,
657 self.opener,
658 self._sidedatafile,
658 self._sidedatafile,
659 self._chunkcachesize,
659 self._chunkcachesize,
660 )
660 )
661 # revnum -> (chain-length, sum-delta-length)
661 # revnum -> (chain-length, sum-delta-length)
662 self._chaininfocache = util.lrucachedict(500)
662 self._chaininfocache = util.lrucachedict(500)
663 # revlog header -> revlog compressor
663 # revlog header -> revlog compressor
664 self._decompressors = {}
664 self._decompressors = {}
665
665
666 @util.propertycache
666 @util.propertycache
667 def revlog_kind(self):
667 def revlog_kind(self):
668 return self.target[0]
668 return self.target[0]
669
669
670 @util.propertycache
670 @util.propertycache
671 def display_id(self):
671 def display_id(self):
672 """The public facing "ID" of the revlog that we use in message"""
672 """The public facing "ID" of the revlog that we use in message"""
673 if self.revlog_kind == KIND_FILELOG:
673 if self.revlog_kind == KIND_FILELOG:
674 # Reference the file without the "data/" prefix, so it is familiar
674 # Reference the file without the "data/" prefix, so it is familiar
675 # to the user.
675 # to the user.
676 return self.target[1]
676 return self.target[1]
677 else:
677 else:
678 return self.radix
678 return self.radix
679
679
680 def _get_decompressor(self, t):
680 def _get_decompressor(self, t):
681 try:
681 try:
682 compressor = self._decompressors[t]
682 compressor = self._decompressors[t]
683 except KeyError:
683 except KeyError:
684 try:
684 try:
685 engine = util.compengines.forrevlogheader(t)
685 engine = util.compengines.forrevlogheader(t)
686 compressor = engine.revlogcompressor(self._compengineopts)
686 compressor = engine.revlogcompressor(self._compengineopts)
687 self._decompressors[t] = compressor
687 self._decompressors[t] = compressor
688 except KeyError:
688 except KeyError:
689 raise error.RevlogError(
689 raise error.RevlogError(
690 _(b'unknown compression type %s') % binascii.hexlify(t)
690 _(b'unknown compression type %s') % binascii.hexlify(t)
691 )
691 )
692 return compressor
692 return compressor
693
693
694 @util.propertycache
694 @util.propertycache
695 def _compressor(self):
695 def _compressor(self):
696 engine = util.compengines[self._compengine]
696 engine = util.compengines[self._compengine]
697 return engine.revlogcompressor(self._compengineopts)
697 return engine.revlogcompressor(self._compengineopts)
698
698
699 @util.propertycache
699 @util.propertycache
700 def _decompressor(self):
700 def _decompressor(self):
701 """the default decompressor"""
701 """the default decompressor"""
702 if self._docket is None:
702 if self._docket is None:
703 return None
703 return None
704 t = self._docket.default_compression_header
704 t = self._docket.default_compression_header
705 c = self._get_decompressor(t)
705 c = self._get_decompressor(t)
706 return c.decompress
706 return c.decompress
707
707
708 def _indexfp(self):
708 def _indexfp(self):
709 """file object for the revlog's index file"""
709 """file object for the revlog's index file"""
710 return self.opener(self._indexfile, mode=b"r")
710 return self.opener(self._indexfile, mode=b"r")
711
711
712 def __index_write_fp(self):
712 def __index_write_fp(self):
713 # You should not use this directly and use `_writing` instead
713 # You should not use this directly and use `_writing` instead
714 try:
714 try:
715 f = self.opener(
715 f = self.opener(
716 self._indexfile, mode=b"r+", checkambig=self._checkambig
716 self._indexfile, mode=b"r+", checkambig=self._checkambig
717 )
717 )
718 if self._docket is None:
718 if self._docket is None:
719 f.seek(0, os.SEEK_END)
719 f.seek(0, os.SEEK_END)
720 else:
720 else:
721 f.seek(self._docket.index_end, os.SEEK_SET)
721 f.seek(self._docket.index_end, os.SEEK_SET)
722 return f
722 return f
723 except FileNotFoundError:
723 except FileNotFoundError:
724 return self.opener(
724 return self.opener(
725 self._indexfile, mode=b"w+", checkambig=self._checkambig
725 self._indexfile, mode=b"w+", checkambig=self._checkambig
726 )
726 )
727
727
728 def __index_new_fp(self):
728 def __index_new_fp(self):
729 # You should not use this unless you are upgrading from inline revlog
729 # You should not use this unless you are upgrading from inline revlog
730 return self.opener(
730 return self.opener(
731 self._indexfile,
731 self._indexfile,
732 mode=b"w",
732 mode=b"w",
733 checkambig=self._checkambig,
733 checkambig=self._checkambig,
734 atomictemp=True,
734 atomictemp=True,
735 )
735 )
736
736
737 def _datafp(self, mode=b'r'):
737 def _datafp(self, mode=b'r'):
738 """file object for the revlog's data file"""
738 """file object for the revlog's data file"""
739 return self.opener(self._datafile, mode=mode)
739 return self.opener(self._datafile, mode=mode)
740
740
741 @contextlib.contextmanager
741 @contextlib.contextmanager
742 def _sidedatareadfp(self):
742 def _sidedatareadfp(self):
743 """file object suitable to read sidedata"""
743 """file object suitable to read sidedata"""
744 if self._writinghandles:
744 if self._writinghandles:
745 yield self._writinghandles[2]
745 yield self._writinghandles[2]
746 else:
746 else:
747 with self.opener(self._sidedatafile) as fp:
747 with self.opener(self._sidedatafile) as fp:
748 yield fp
748 yield fp
749
749
750 def tiprev(self):
750 def tiprev(self):
751 return len(self.index) - 1
751 return len(self.index) - 1
752
752
753 def tip(self):
753 def tip(self):
754 return self.node(self.tiprev())
754 return self.node(self.tiprev())
755
755
756 def __contains__(self, rev):
756 def __contains__(self, rev):
757 return 0 <= rev < len(self)
757 return 0 <= rev < len(self)
758
758
759 def __len__(self):
759 def __len__(self):
760 return len(self.index)
760 return len(self.index)
761
761
762 def __iter__(self):
762 def __iter__(self):
763 return iter(range(len(self)))
763 return iter(range(len(self)))
764
764
765 def revs(self, start=0, stop=None):
765 def revs(self, start=0, stop=None):
766 """iterate over all rev in this revlog (from start to stop)"""
766 """iterate over all rev in this revlog (from start to stop)"""
767 return storageutil.iterrevs(len(self), start=start, stop=stop)
767 return storageutil.iterrevs(len(self), start=start, stop=stop)
768
768
769 def hasnode(self, node):
769 def hasnode(self, node):
770 try:
770 try:
771 self.rev(node)
771 self.rev(node)
772 return True
772 return True
773 except KeyError:
773 except KeyError:
774 return False
774 return False
775
775
776 def candelta(self, baserev, rev):
776 def candelta(self, baserev, rev):
777 """whether two revisions (baserev, rev) can be delta-ed or not"""
777 """whether two revisions (baserev, rev) can be delta-ed or not"""
778 # Disable delta if either rev requires a content-changing flag
778 # Disable delta if either rev requires a content-changing flag
779 # processor (ex. LFS). This is because such flag processor can alter
779 # processor (ex. LFS). This is because such flag processor can alter
780 # the rawtext content that the delta will be based on, and two clients
780 # the rawtext content that the delta will be based on, and two clients
781 # could have a same revlog node with different flags (i.e. different
781 # could have a same revlog node with different flags (i.e. different
782 # rawtext contents) and the delta could be incompatible.
782 # rawtext contents) and the delta could be incompatible.
783 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
783 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
784 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
784 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
785 ):
785 ):
786 return False
786 return False
787 return True
787 return True
788
788
789 def update_caches(self, transaction):
789 def update_caches(self, transaction):
790 if self._nodemap_file is not None:
790 if self._nodemap_file is not None:
791 if transaction is None:
791 if transaction is None:
792 nodemaputil.update_persistent_nodemap(self)
792 nodemaputil.update_persistent_nodemap(self)
793 else:
793 else:
794 nodemaputil.setup_persistent_nodemap(transaction, self)
794 nodemaputil.setup_persistent_nodemap(transaction, self)
795
795
796 def clearcaches(self):
796 def clearcaches(self):
797 self._revisioncache = None
797 self._revisioncache = None
798 self._chainbasecache.clear()
798 self._chainbasecache.clear()
799 self._segmentfile.clear_cache()
799 self._segmentfile.clear_cache()
800 self._segmentfile_sidedata.clear_cache()
800 self._segmentfile_sidedata.clear_cache()
801 self._pcache = {}
801 self._pcache = {}
802 self._nodemap_docket = None
802 self._nodemap_docket = None
803 self.index.clearcaches()
803 self.index.clearcaches()
804 # The python code is the one responsible for validating the docket, we
804 # The python code is the one responsible for validating the docket, we
805 # end up having to refresh it here.
805 # end up having to refresh it here.
806 use_nodemap = (
806 use_nodemap = (
807 not self._inline
807 not self._inline
808 and self._nodemap_file is not None
808 and self._nodemap_file is not None
809 and util.safehasattr(self.index, 'update_nodemap_data')
809 and util.safehasattr(self.index, 'update_nodemap_data')
810 )
810 )
811 if use_nodemap:
811 if use_nodemap:
812 nodemap_data = nodemaputil.persisted_data(self)
812 nodemap_data = nodemaputil.persisted_data(self)
813 if nodemap_data is not None:
813 if nodemap_data is not None:
814 self._nodemap_docket = nodemap_data[0]
814 self._nodemap_docket = nodemap_data[0]
815 self.index.update_nodemap_data(*nodemap_data)
815 self.index.update_nodemap_data(*nodemap_data)
816
816
817 def rev(self, node):
817 def rev(self, node):
818 try:
818 try:
819 return self.index.rev(node)
819 return self.index.rev(node)
820 except TypeError:
820 except TypeError:
821 raise
821 raise
822 except error.RevlogError:
822 except error.RevlogError:
823 # parsers.c radix tree lookup failed
823 # parsers.c radix tree lookup failed
824 if (
824 if (
825 node == self.nodeconstants.wdirid
825 node == self.nodeconstants.wdirid
826 or node in self.nodeconstants.wdirfilenodeids
826 or node in self.nodeconstants.wdirfilenodeids
827 ):
827 ):
828 raise error.WdirUnsupported
828 raise error.WdirUnsupported
829 raise error.LookupError(node, self.display_id, _(b'no node'))
829 raise error.LookupError(node, self.display_id, _(b'no node'))
830
830
831 # Accessors for index entries.
831 # Accessors for index entries.
832
832
833 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
833 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
834 # are flags.
834 # are flags.
835 def start(self, rev):
835 def start(self, rev):
836 return int(self.index[rev][0] >> 16)
836 return int(self.index[rev][0] >> 16)
837
837
838 def sidedata_cut_off(self, rev):
838 def sidedata_cut_off(self, rev):
839 sd_cut_off = self.index[rev][8]
839 sd_cut_off = self.index[rev][8]
840 if sd_cut_off != 0:
840 if sd_cut_off != 0:
841 return sd_cut_off
841 return sd_cut_off
842 # This is some annoying dance, because entries without sidedata
842 # This is some annoying dance, because entries without sidedata
843 # currently use 0 as their ofsset. (instead of previous-offset +
843 # currently use 0 as their ofsset. (instead of previous-offset +
844 # previous-size)
844 # previous-size)
845 #
845 #
846 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
846 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
847 # In the meantime, we need this.
847 # In the meantime, we need this.
848 while 0 <= rev:
848 while 0 <= rev:
849 e = self.index[rev]
849 e = self.index[rev]
850 if e[9] != 0:
850 if e[9] != 0:
851 return e[8] + e[9]
851 return e[8] + e[9]
852 rev -= 1
852 rev -= 1
853 return 0
853 return 0
854
854
855 def flags(self, rev):
855 def flags(self, rev):
856 return self.index[rev][0] & 0xFFFF
856 return self.index[rev][0] & 0xFFFF
857
857
858 def length(self, rev):
858 def length(self, rev):
859 return self.index[rev][1]
859 return self.index[rev][1]
860
860
861 def sidedata_length(self, rev):
861 def sidedata_length(self, rev):
862 if not self.hassidedata:
862 if not self.hassidedata:
863 return 0
863 return 0
864 return self.index[rev][9]
864 return self.index[rev][9]
865
865
866 def rawsize(self, rev):
866 def rawsize(self, rev):
867 """return the length of the uncompressed text for a given revision"""
867 """return the length of the uncompressed text for a given revision"""
868 l = self.index[rev][2]
868 l = self.index[rev][2]
869 if l >= 0:
869 if l >= 0:
870 return l
870 return l
871
871
872 t = self.rawdata(rev)
872 t = self.rawdata(rev)
873 return len(t)
873 return len(t)
874
874
875 def size(self, rev):
875 def size(self, rev):
876 """length of non-raw text (processed by a "read" flag processor)"""
876 """length of non-raw text (processed by a "read" flag processor)"""
877 # fast path: if no "read" flag processor could change the content,
877 # fast path: if no "read" flag processor could change the content,
878 # size is rawsize. note: ELLIPSIS is known to not change the content.
878 # size is rawsize. note: ELLIPSIS is known to not change the content.
879 flags = self.flags(rev)
879 flags = self.flags(rev)
880 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
880 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
881 return self.rawsize(rev)
881 return self.rawsize(rev)
882
882
883 return len(self.revision(rev))
883 return len(self.revision(rev))
884
884
885 def fast_rank(self, rev):
885 def fast_rank(self, rev):
886 """Return the rank of a revision if already known, or None otherwise.
886 """Return the rank of a revision if already known, or None otherwise.
887
887
888 The rank of a revision is the size of the sub-graph it defines as a
888 The rank of a revision is the size of the sub-graph it defines as a
889 head. Equivalently, the rank of a revision `r` is the size of the set
889 head. Equivalently, the rank of a revision `r` is the size of the set
890 `ancestors(r)`, `r` included.
890 `ancestors(r)`, `r` included.
891
891
892 This method returns the rank retrieved from the revlog in constant
892 This method returns the rank retrieved from the revlog in constant
893 time. It makes no attempt at computing unknown values for versions of
893 time. It makes no attempt at computing unknown values for versions of
894 the revlog which do not persist the rank.
894 the revlog which do not persist the rank.
895 """
895 """
896 rank = self.index[rev][ENTRY_RANK]
896 rank = self.index[rev][ENTRY_RANK]
897 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
897 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
898 return None
898 return None
899 if rev == nullrev:
899 if rev == nullrev:
900 return 0 # convention
900 return 0 # convention
901 return rank
901 return rank
902
902
903 def chainbase(self, rev):
903 def chainbase(self, rev):
904 base = self._chainbasecache.get(rev)
904 base = self._chainbasecache.get(rev)
905 if base is not None:
905 if base is not None:
906 return base
906 return base
907
907
908 index = self.index
908 index = self.index
909 iterrev = rev
909 iterrev = rev
910 base = index[iterrev][3]
910 base = index[iterrev][3]
911 while base != iterrev:
911 while base != iterrev:
912 iterrev = base
912 iterrev = base
913 base = index[iterrev][3]
913 base = index[iterrev][3]
914
914
915 self._chainbasecache[rev] = base
915 self._chainbasecache[rev] = base
916 return base
916 return base
917
917
918 def linkrev(self, rev):
918 def linkrev(self, rev):
919 return self.index[rev][4]
919 return self.index[rev][4]
920
920
921 def parentrevs(self, rev):
921 def parentrevs(self, rev):
922 try:
922 try:
923 entry = self.index[rev]
923 entry = self.index[rev]
924 except IndexError:
924 except IndexError:
925 if rev == wdirrev:
925 if rev == wdirrev:
926 raise error.WdirUnsupported
926 raise error.WdirUnsupported
927 raise
927 raise
928
928
929 if self.canonical_parent_order and entry[5] == nullrev:
929 if self.canonical_parent_order and entry[5] == nullrev:
930 return entry[6], entry[5]
930 return entry[6], entry[5]
931 else:
931 else:
932 return entry[5], entry[6]
932 return entry[5], entry[6]
933
933
934 # fast parentrevs(rev) where rev isn't filtered
934 # fast parentrevs(rev) where rev isn't filtered
935 _uncheckedparentrevs = parentrevs
935 _uncheckedparentrevs = parentrevs
936
936
937 def node(self, rev):
937 def node(self, rev):
938 try:
938 try:
939 return self.index[rev][7]
939 return self.index[rev][7]
940 except IndexError:
940 except IndexError:
941 if rev == wdirrev:
941 if rev == wdirrev:
942 raise error.WdirUnsupported
942 raise error.WdirUnsupported
943 raise
943 raise
944
944
945 # Derived from index values.
945 # Derived from index values.
946
946
947 def end(self, rev):
947 def end(self, rev):
948 return self.start(rev) + self.length(rev)
948 return self.start(rev) + self.length(rev)
949
949
950 def parents(self, node):
950 def parents(self, node):
951 i = self.index
951 i = self.index
952 d = i[self.rev(node)]
952 d = i[self.rev(node)]
953 # inline node() to avoid function call overhead
953 # inline node() to avoid function call overhead
954 if self.canonical_parent_order and d[5] == self.nullid:
954 if self.canonical_parent_order and d[5] == self.nullid:
955 return i[d[6]][7], i[d[5]][7]
955 return i[d[6]][7], i[d[5]][7]
956 else:
956 else:
957 return i[d[5]][7], i[d[6]][7]
957 return i[d[5]][7], i[d[6]][7]
958
958
959 def chainlen(self, rev):
959 def chainlen(self, rev):
960 return self._chaininfo(rev)[0]
960 return self._chaininfo(rev)[0]
961
961
962 def _chaininfo(self, rev):
962 def _chaininfo(self, rev):
963 chaininfocache = self._chaininfocache
963 chaininfocache = self._chaininfocache
964 if rev in chaininfocache:
964 if rev in chaininfocache:
965 return chaininfocache[rev]
965 return chaininfocache[rev]
966 index = self.index
966 index = self.index
967 generaldelta = self._generaldelta
967 generaldelta = self._generaldelta
968 iterrev = rev
968 iterrev = rev
969 e = index[iterrev]
969 e = index[iterrev]
970 clen = 0
970 clen = 0
971 compresseddeltalen = 0
971 compresseddeltalen = 0
972 while iterrev != e[3]:
972 while iterrev != e[3]:
973 clen += 1
973 clen += 1
974 compresseddeltalen += e[1]
974 compresseddeltalen += e[1]
975 if generaldelta:
975 if generaldelta:
976 iterrev = e[3]
976 iterrev = e[3]
977 else:
977 else:
978 iterrev -= 1
978 iterrev -= 1
979 if iterrev in chaininfocache:
979 if iterrev in chaininfocache:
980 t = chaininfocache[iterrev]
980 t = chaininfocache[iterrev]
981 clen += t[0]
981 clen += t[0]
982 compresseddeltalen += t[1]
982 compresseddeltalen += t[1]
983 break
983 break
984 e = index[iterrev]
984 e = index[iterrev]
985 else:
985 else:
986 # Add text length of base since decompressing that also takes
986 # Add text length of base since decompressing that also takes
987 # work. For cache hits the length is already included.
987 # work. For cache hits the length is already included.
988 compresseddeltalen += e[1]
988 compresseddeltalen += e[1]
989 r = (clen, compresseddeltalen)
989 r = (clen, compresseddeltalen)
990 chaininfocache[rev] = r
990 chaininfocache[rev] = r
991 return r
991 return r
992
992
993 def _deltachain(self, rev, stoprev=None):
993 def _deltachain(self, rev, stoprev=None):
994 """Obtain the delta chain for a revision.
994 """Obtain the delta chain for a revision.
995
995
996 ``stoprev`` specifies a revision to stop at. If not specified, we
996 ``stoprev`` specifies a revision to stop at. If not specified, we
997 stop at the base of the chain.
997 stop at the base of the chain.
998
998
999 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
999 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1000 revs in ascending order and ``stopped`` is a bool indicating whether
1000 revs in ascending order and ``stopped`` is a bool indicating whether
1001 ``stoprev`` was hit.
1001 ``stoprev`` was hit.
1002 """
1002 """
1003 # Try C implementation.
1003 # Try C implementation.
1004 try:
1004 try:
1005 return self.index.deltachain(rev, stoprev, self._generaldelta)
1005 return self.index.deltachain(rev, stoprev, self._generaldelta)
1006 except AttributeError:
1006 except AttributeError:
1007 pass
1007 pass
1008
1008
1009 chain = []
1009 chain = []
1010
1010
1011 # Alias to prevent attribute lookup in tight loop.
1011 # Alias to prevent attribute lookup in tight loop.
1012 index = self.index
1012 index = self.index
1013 generaldelta = self._generaldelta
1013 generaldelta = self._generaldelta
1014
1014
1015 iterrev = rev
1015 iterrev = rev
1016 e = index[iterrev]
1016 e = index[iterrev]
1017 while iterrev != e[3] and iterrev != stoprev:
1017 while iterrev != e[3] and iterrev != stoprev:
1018 chain.append(iterrev)
1018 chain.append(iterrev)
1019 if generaldelta:
1019 if generaldelta:
1020 iterrev = e[3]
1020 iterrev = e[3]
1021 else:
1021 else:
1022 iterrev -= 1
1022 iterrev -= 1
1023 e = index[iterrev]
1023 e = index[iterrev]
1024
1024
1025 if iterrev == stoprev:
1025 if iterrev == stoprev:
1026 stopped = True
1026 stopped = True
1027 else:
1027 else:
1028 chain.append(iterrev)
1028 chain.append(iterrev)
1029 stopped = False
1029 stopped = False
1030
1030
1031 chain.reverse()
1031 chain.reverse()
1032 return chain, stopped
1032 return chain, stopped
1033
1033
1034 def ancestors(self, revs, stoprev=0, inclusive=False):
1034 def ancestors(self, revs, stoprev=0, inclusive=False):
1035 """Generate the ancestors of 'revs' in reverse revision order.
1035 """Generate the ancestors of 'revs' in reverse revision order.
1036 Does not generate revs lower than stoprev.
1036 Does not generate revs lower than stoprev.
1037
1037
1038 See the documentation for ancestor.lazyancestors for more details."""
1038 See the documentation for ancestor.lazyancestors for more details."""
1039
1039
1040 # first, make sure start revisions aren't filtered
1040 # first, make sure start revisions aren't filtered
1041 revs = list(revs)
1041 revs = list(revs)
1042 checkrev = self.node
1042 checkrev = self.node
1043 for r in revs:
1043 for r in revs:
1044 checkrev(r)
1044 checkrev(r)
1045 # and we're sure ancestors aren't filtered as well
1045 # and we're sure ancestors aren't filtered as well
1046
1046
1047 if rustancestor is not None and self.index.rust_ext_compat:
1047 if rustancestor is not None and self.index.rust_ext_compat:
1048 lazyancestors = rustancestor.LazyAncestors
1048 lazyancestors = rustancestor.LazyAncestors
1049 arg = self.index
1049 arg = self.index
1050 else:
1050 else:
1051 lazyancestors = ancestor.lazyancestors
1051 lazyancestors = ancestor.lazyancestors
1052 arg = self._uncheckedparentrevs
1052 arg = self._uncheckedparentrevs
1053 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1053 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1054
1054
1055 def descendants(self, revs):
1055 def descendants(self, revs):
1056 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1056 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1057
1057
1058 def findcommonmissing(self, common=None, heads=None):
1058 def findcommonmissing(self, common=None, heads=None):
1059 """Return a tuple of the ancestors of common and the ancestors of heads
1059 """Return a tuple of the ancestors of common and the ancestors of heads
1060 that are not ancestors of common. In revset terminology, we return the
1060 that are not ancestors of common. In revset terminology, we return the
1061 tuple:
1061 tuple:
1062
1062
1063 ::common, (::heads) - (::common)
1063 ::common, (::heads) - (::common)
1064
1064
1065 The list is sorted by revision number, meaning it is
1065 The list is sorted by revision number, meaning it is
1066 topologically sorted.
1066 topologically sorted.
1067
1067
1068 'heads' and 'common' are both lists of node IDs. If heads is
1068 'heads' and 'common' are both lists of node IDs. If heads is
1069 not supplied, uses all of the revlog's heads. If common is not
1069 not supplied, uses all of the revlog's heads. If common is not
1070 supplied, uses nullid."""
1070 supplied, uses nullid."""
1071 if common is None:
1071 if common is None:
1072 common = [self.nullid]
1072 common = [self.nullid]
1073 if heads is None:
1073 if heads is None:
1074 heads = self.heads()
1074 heads = self.heads()
1075
1075
1076 common = [self.rev(n) for n in common]
1076 common = [self.rev(n) for n in common]
1077 heads = [self.rev(n) for n in heads]
1077 heads = [self.rev(n) for n in heads]
1078
1078
1079 # we want the ancestors, but inclusive
1079 # we want the ancestors, but inclusive
1080 class lazyset:
1080 class lazyset:
1081 def __init__(self, lazyvalues):
1081 def __init__(self, lazyvalues):
1082 self.addedvalues = set()
1082 self.addedvalues = set()
1083 self.lazyvalues = lazyvalues
1083 self.lazyvalues = lazyvalues
1084
1084
1085 def __contains__(self, value):
1085 def __contains__(self, value):
1086 return value in self.addedvalues or value in self.lazyvalues
1086 return value in self.addedvalues or value in self.lazyvalues
1087
1087
1088 def __iter__(self):
1088 def __iter__(self):
1089 added = self.addedvalues
1089 added = self.addedvalues
1090 for r in added:
1090 for r in added:
1091 yield r
1091 yield r
1092 for r in self.lazyvalues:
1092 for r in self.lazyvalues:
1093 if not r in added:
1093 if not r in added:
1094 yield r
1094 yield r
1095
1095
1096 def add(self, value):
1096 def add(self, value):
1097 self.addedvalues.add(value)
1097 self.addedvalues.add(value)
1098
1098
1099 def update(self, values):
1099 def update(self, values):
1100 self.addedvalues.update(values)
1100 self.addedvalues.update(values)
1101
1101
1102 has = lazyset(self.ancestors(common))
1102 has = lazyset(self.ancestors(common))
1103 has.add(nullrev)
1103 has.add(nullrev)
1104 has.update(common)
1104 has.update(common)
1105
1105
1106 # take all ancestors from heads that aren't in has
1106 # take all ancestors from heads that aren't in has
1107 missing = set()
1107 missing = set()
1108 visit = collections.deque(r for r in heads if r not in has)
1108 visit = collections.deque(r for r in heads if r not in has)
1109 while visit:
1109 while visit:
1110 r = visit.popleft()
1110 r = visit.popleft()
1111 if r in missing:
1111 if r in missing:
1112 continue
1112 continue
1113 else:
1113 else:
1114 missing.add(r)
1114 missing.add(r)
1115 for p in self.parentrevs(r):
1115 for p in self.parentrevs(r):
1116 if p not in has:
1116 if p not in has:
1117 visit.append(p)
1117 visit.append(p)
1118 missing = list(missing)
1118 missing = list(missing)
1119 missing.sort()
1119 missing.sort()
1120 return has, [self.node(miss) for miss in missing]
1120 return has, [self.node(miss) for miss in missing]
1121
1121
1122 def incrementalmissingrevs(self, common=None):
1122 def incrementalmissingrevs(self, common=None):
1123 """Return an object that can be used to incrementally compute the
1123 """Return an object that can be used to incrementally compute the
1124 revision numbers of the ancestors of arbitrary sets that are not
1124 revision numbers of the ancestors of arbitrary sets that are not
1125 ancestors of common. This is an ancestor.incrementalmissingancestors
1125 ancestors of common. This is an ancestor.incrementalmissingancestors
1126 object.
1126 object.
1127
1127
1128 'common' is a list of revision numbers. If common is not supplied, uses
1128 'common' is a list of revision numbers. If common is not supplied, uses
1129 nullrev.
1129 nullrev.
1130 """
1130 """
1131 if common is None:
1131 if common is None:
1132 common = [nullrev]
1132 common = [nullrev]
1133
1133
1134 if rustancestor is not None and self.index.rust_ext_compat:
1134 if rustancestor is not None and self.index.rust_ext_compat:
1135 return rustancestor.MissingAncestors(self.index, common)
1135 return rustancestor.MissingAncestors(self.index, common)
1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1136 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1137
1137
1138 def findmissingrevs(self, common=None, heads=None):
1138 def findmissingrevs(self, common=None, heads=None):
1139 """Return the revision numbers of the ancestors of heads that
1139 """Return the revision numbers of the ancestors of heads that
1140 are not ancestors of common.
1140 are not ancestors of common.
1141
1141
1142 More specifically, return a list of revision numbers corresponding to
1142 More specifically, return a list of revision numbers corresponding to
1143 nodes N such that every N satisfies the following constraints:
1143 nodes N such that every N satisfies the following constraints:
1144
1144
1145 1. N is an ancestor of some node in 'heads'
1145 1. N is an ancestor of some node in 'heads'
1146 2. N is not an ancestor of any node in 'common'
1146 2. N is not an ancestor of any node in 'common'
1147
1147
1148 The list is sorted by revision number, meaning it is
1148 The list is sorted by revision number, meaning it is
1149 topologically sorted.
1149 topologically sorted.
1150
1150
1151 'heads' and 'common' are both lists of revision numbers. If heads is
1151 'heads' and 'common' are both lists of revision numbers. If heads is
1152 not supplied, uses all of the revlog's heads. If common is not
1152 not supplied, uses all of the revlog's heads. If common is not
1153 supplied, uses nullid."""
1153 supplied, uses nullid."""
1154 if common is None:
1154 if common is None:
1155 common = [nullrev]
1155 common = [nullrev]
1156 if heads is None:
1156 if heads is None:
1157 heads = self.headrevs()
1157 heads = self.headrevs()
1158
1158
1159 inc = self.incrementalmissingrevs(common=common)
1159 inc = self.incrementalmissingrevs(common=common)
1160 return inc.missingancestors(heads)
1160 return inc.missingancestors(heads)
1161
1161
1162 def findmissing(self, common=None, heads=None):
1162 def findmissing(self, common=None, heads=None):
1163 """Return the ancestors of heads that are not ancestors of common.
1163 """Return the ancestors of heads that are not ancestors of common.
1164
1164
1165 More specifically, return a list of nodes N such that every N
1165 More specifically, return a list of nodes N such that every N
1166 satisfies the following constraints:
1166 satisfies the following constraints:
1167
1167
1168 1. N is an ancestor of some node in 'heads'
1168 1. N is an ancestor of some node in 'heads'
1169 2. N is not an ancestor of any node in 'common'
1169 2. N is not an ancestor of any node in 'common'
1170
1170
1171 The list is sorted by revision number, meaning it is
1171 The list is sorted by revision number, meaning it is
1172 topologically sorted.
1172 topologically sorted.
1173
1173
1174 'heads' and 'common' are both lists of node IDs. If heads is
1174 'heads' and 'common' are both lists of node IDs. If heads is
1175 not supplied, uses all of the revlog's heads. If common is not
1175 not supplied, uses all of the revlog's heads. If common is not
1176 supplied, uses nullid."""
1176 supplied, uses nullid."""
1177 if common is None:
1177 if common is None:
1178 common = [self.nullid]
1178 common = [self.nullid]
1179 if heads is None:
1179 if heads is None:
1180 heads = self.heads()
1180 heads = self.heads()
1181
1181
1182 common = [self.rev(n) for n in common]
1182 common = [self.rev(n) for n in common]
1183 heads = [self.rev(n) for n in heads]
1183 heads = [self.rev(n) for n in heads]
1184
1184
1185 inc = self.incrementalmissingrevs(common=common)
1185 inc = self.incrementalmissingrevs(common=common)
1186 return [self.node(r) for r in inc.missingancestors(heads)]
1186 return [self.node(r) for r in inc.missingancestors(heads)]
1187
1187
1188 def nodesbetween(self, roots=None, heads=None):
1188 def nodesbetween(self, roots=None, heads=None):
1189 """Return a topological path from 'roots' to 'heads'.
1189 """Return a topological path from 'roots' to 'heads'.
1190
1190
1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1191 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1192 topologically sorted list of all nodes N that satisfy both of
1192 topologically sorted list of all nodes N that satisfy both of
1193 these constraints:
1193 these constraints:
1194
1194
1195 1. N is a descendant of some node in 'roots'
1195 1. N is a descendant of some node in 'roots'
1196 2. N is an ancestor of some node in 'heads'
1196 2. N is an ancestor of some node in 'heads'
1197
1197
1198 Every node is considered to be both a descendant and an ancestor
1198 Every node is considered to be both a descendant and an ancestor
1199 of itself, so every reachable node in 'roots' and 'heads' will be
1199 of itself, so every reachable node in 'roots' and 'heads' will be
1200 included in 'nodes'.
1200 included in 'nodes'.
1201
1201
1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1202 'outroots' is the list of reachable nodes in 'roots', i.e., the
1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1203 subset of 'roots' that is returned in 'nodes'. Likewise,
1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1204 'outheads' is the subset of 'heads' that is also in 'nodes'.
1205
1205
1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1206 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1207 unspecified, uses nullid as the only root. If 'heads' is
1207 unspecified, uses nullid as the only root. If 'heads' is
1208 unspecified, uses list of all of the revlog's heads."""
1208 unspecified, uses list of all of the revlog's heads."""
1209 nonodes = ([], [], [])
1209 nonodes = ([], [], [])
1210 if roots is not None:
1210 if roots is not None:
1211 roots = list(roots)
1211 roots = list(roots)
1212 if not roots:
1212 if not roots:
1213 return nonodes
1213 return nonodes
1214 lowestrev = min([self.rev(n) for n in roots])
1214 lowestrev = min([self.rev(n) for n in roots])
1215 else:
1215 else:
1216 roots = [self.nullid] # Everybody's a descendant of nullid
1216 roots = [self.nullid] # Everybody's a descendant of nullid
1217 lowestrev = nullrev
1217 lowestrev = nullrev
1218 if (lowestrev == nullrev) and (heads is None):
1218 if (lowestrev == nullrev) and (heads is None):
1219 # We want _all_ the nodes!
1219 # We want _all_ the nodes!
1220 return (
1220 return (
1221 [self.node(r) for r in self],
1221 [self.node(r) for r in self],
1222 [self.nullid],
1222 [self.nullid],
1223 list(self.heads()),
1223 list(self.heads()),
1224 )
1224 )
1225 if heads is None:
1225 if heads is None:
1226 # All nodes are ancestors, so the latest ancestor is the last
1226 # All nodes are ancestors, so the latest ancestor is the last
1227 # node.
1227 # node.
1228 highestrev = len(self) - 1
1228 highestrev = len(self) - 1
1229 # Set ancestors to None to signal that every node is an ancestor.
1229 # Set ancestors to None to signal that every node is an ancestor.
1230 ancestors = None
1230 ancestors = None
1231 # Set heads to an empty dictionary for later discovery of heads
1231 # Set heads to an empty dictionary for later discovery of heads
1232 heads = {}
1232 heads = {}
1233 else:
1233 else:
1234 heads = list(heads)
1234 heads = list(heads)
1235 if not heads:
1235 if not heads:
1236 return nonodes
1236 return nonodes
1237 ancestors = set()
1237 ancestors = set()
1238 # Turn heads into a dictionary so we can remove 'fake' heads.
1238 # Turn heads into a dictionary so we can remove 'fake' heads.
1239 # Also, later we will be using it to filter out the heads we can't
1239 # Also, later we will be using it to filter out the heads we can't
1240 # find from roots.
1240 # find from roots.
1241 heads = dict.fromkeys(heads, False)
1241 heads = dict.fromkeys(heads, False)
1242 # Start at the top and keep marking parents until we're done.
1242 # Start at the top and keep marking parents until we're done.
1243 nodestotag = set(heads)
1243 nodestotag = set(heads)
1244 # Remember where the top was so we can use it as a limit later.
1244 # Remember where the top was so we can use it as a limit later.
1245 highestrev = max([self.rev(n) for n in nodestotag])
1245 highestrev = max([self.rev(n) for n in nodestotag])
1246 while nodestotag:
1246 while nodestotag:
1247 # grab a node to tag
1247 # grab a node to tag
1248 n = nodestotag.pop()
1248 n = nodestotag.pop()
1249 # Never tag nullid
1249 # Never tag nullid
1250 if n == self.nullid:
1250 if n == self.nullid:
1251 continue
1251 continue
1252 # A node's revision number represents its place in a
1252 # A node's revision number represents its place in a
1253 # topologically sorted list of nodes.
1253 # topologically sorted list of nodes.
1254 r = self.rev(n)
1254 r = self.rev(n)
1255 if r >= lowestrev:
1255 if r >= lowestrev:
1256 if n not in ancestors:
1256 if n not in ancestors:
1257 # If we are possibly a descendant of one of the roots
1257 # If we are possibly a descendant of one of the roots
1258 # and we haven't already been marked as an ancestor
1258 # and we haven't already been marked as an ancestor
1259 ancestors.add(n) # Mark as ancestor
1259 ancestors.add(n) # Mark as ancestor
1260 # Add non-nullid parents to list of nodes to tag.
1260 # Add non-nullid parents to list of nodes to tag.
1261 nodestotag.update(
1261 nodestotag.update(
1262 [p for p in self.parents(n) if p != self.nullid]
1262 [p for p in self.parents(n) if p != self.nullid]
1263 )
1263 )
1264 elif n in heads: # We've seen it before, is it a fake head?
1264 elif n in heads: # We've seen it before, is it a fake head?
1265 # So it is, real heads should not be the ancestors of
1265 # So it is, real heads should not be the ancestors of
1266 # any other heads.
1266 # any other heads.
1267 heads.pop(n)
1267 heads.pop(n)
1268 if not ancestors:
1268 if not ancestors:
1269 return nonodes
1269 return nonodes
1270 # Now that we have our set of ancestors, we want to remove any
1270 # Now that we have our set of ancestors, we want to remove any
1271 # roots that are not ancestors.
1271 # roots that are not ancestors.
1272
1272
1273 # If one of the roots was nullid, everything is included anyway.
1273 # If one of the roots was nullid, everything is included anyway.
1274 if lowestrev > nullrev:
1274 if lowestrev > nullrev:
1275 # But, since we weren't, let's recompute the lowest rev to not
1275 # But, since we weren't, let's recompute the lowest rev to not
1276 # include roots that aren't ancestors.
1276 # include roots that aren't ancestors.
1277
1277
1278 # Filter out roots that aren't ancestors of heads
1278 # Filter out roots that aren't ancestors of heads
1279 roots = [root for root in roots if root in ancestors]
1279 roots = [root for root in roots if root in ancestors]
1280 # Recompute the lowest revision
1280 # Recompute the lowest revision
1281 if roots:
1281 if roots:
1282 lowestrev = min([self.rev(root) for root in roots])
1282 lowestrev = min([self.rev(root) for root in roots])
1283 else:
1283 else:
1284 # No more roots? Return empty list
1284 # No more roots? Return empty list
1285 return nonodes
1285 return nonodes
1286 else:
1286 else:
1287 # We are descending from nullid, and don't need to care about
1287 # We are descending from nullid, and don't need to care about
1288 # any other roots.
1288 # any other roots.
1289 lowestrev = nullrev
1289 lowestrev = nullrev
1290 roots = [self.nullid]
1290 roots = [self.nullid]
1291 # Transform our roots list into a set.
1291 # Transform our roots list into a set.
1292 descendants = set(roots)
1292 descendants = set(roots)
1293 # Also, keep the original roots so we can filter out roots that aren't
1293 # Also, keep the original roots so we can filter out roots that aren't
1294 # 'real' roots (i.e. are descended from other roots).
1294 # 'real' roots (i.e. are descended from other roots).
1295 roots = descendants.copy()
1295 roots = descendants.copy()
1296 # Our topologically sorted list of output nodes.
1296 # Our topologically sorted list of output nodes.
1297 orderedout = []
1297 orderedout = []
1298 # Don't start at nullid since we don't want nullid in our output list,
1298 # Don't start at nullid since we don't want nullid in our output list,
1299 # and if nullid shows up in descendants, empty parents will look like
1299 # and if nullid shows up in descendants, empty parents will look like
1300 # they're descendants.
1300 # they're descendants.
1301 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1301 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1302 n = self.node(r)
1302 n = self.node(r)
1303 isdescendant = False
1303 isdescendant = False
1304 if lowestrev == nullrev: # Everybody is a descendant of nullid
1304 if lowestrev == nullrev: # Everybody is a descendant of nullid
1305 isdescendant = True
1305 isdescendant = True
1306 elif n in descendants:
1306 elif n in descendants:
1307 # n is already a descendant
1307 # n is already a descendant
1308 isdescendant = True
1308 isdescendant = True
1309 # This check only needs to be done here because all the roots
1309 # This check only needs to be done here because all the roots
1310 # will start being marked is descendants before the loop.
1310 # will start being marked is descendants before the loop.
1311 if n in roots:
1311 if n in roots:
1312 # If n was a root, check if it's a 'real' root.
1312 # If n was a root, check if it's a 'real' root.
1313 p = tuple(self.parents(n))
1313 p = tuple(self.parents(n))
1314 # If any of its parents are descendants, it's not a root.
1314 # If any of its parents are descendants, it's not a root.
1315 if (p[0] in descendants) or (p[1] in descendants):
1315 if (p[0] in descendants) or (p[1] in descendants):
1316 roots.remove(n)
1316 roots.remove(n)
1317 else:
1317 else:
1318 p = tuple(self.parents(n))
1318 p = tuple(self.parents(n))
1319 # A node is a descendant if either of its parents are
1319 # A node is a descendant if either of its parents are
1320 # descendants. (We seeded the dependents list with the roots
1320 # descendants. (We seeded the dependents list with the roots
1321 # up there, remember?)
1321 # up there, remember?)
1322 if (p[0] in descendants) or (p[1] in descendants):
1322 if (p[0] in descendants) or (p[1] in descendants):
1323 descendants.add(n)
1323 descendants.add(n)
1324 isdescendant = True
1324 isdescendant = True
1325 if isdescendant and ((ancestors is None) or (n in ancestors)):
1325 if isdescendant and ((ancestors is None) or (n in ancestors)):
1326 # Only include nodes that are both descendants and ancestors.
1326 # Only include nodes that are both descendants and ancestors.
1327 orderedout.append(n)
1327 orderedout.append(n)
1328 if (ancestors is not None) and (n in heads):
1328 if (ancestors is not None) and (n in heads):
1329 # We're trying to figure out which heads are reachable
1329 # We're trying to figure out which heads are reachable
1330 # from roots.
1330 # from roots.
1331 # Mark this head as having been reached
1331 # Mark this head as having been reached
1332 heads[n] = True
1332 heads[n] = True
1333 elif ancestors is None:
1333 elif ancestors is None:
1334 # Otherwise, we're trying to discover the heads.
1334 # Otherwise, we're trying to discover the heads.
1335 # Assume this is a head because if it isn't, the next step
1335 # Assume this is a head because if it isn't, the next step
1336 # will eventually remove it.
1336 # will eventually remove it.
1337 heads[n] = True
1337 heads[n] = True
1338 # But, obviously its parents aren't.
1338 # But, obviously its parents aren't.
1339 for p in self.parents(n):
1339 for p in self.parents(n):
1340 heads.pop(p, None)
1340 heads.pop(p, None)
1341 heads = [head for head, flag in heads.items() if flag]
1341 heads = [head for head, flag in heads.items() if flag]
1342 roots = list(roots)
1342 roots = list(roots)
1343 assert orderedout
1343 assert orderedout
1344 assert roots
1344 assert roots
1345 assert heads
1345 assert heads
1346 return (orderedout, roots, heads)
1346 return (orderedout, roots, heads)
1347
1347
1348 def headrevs(self, revs=None):
1348 def headrevs(self, revs=None):
1349 if revs is None:
1349 if revs is None:
1350 try:
1350 try:
1351 return self.index.headrevs()
1351 return self.index.headrevs()
1352 except AttributeError:
1352 except AttributeError:
1353 return self._headrevs()
1353 return self._headrevs()
1354 if rustdagop is not None and self.index.rust_ext_compat:
1354 if rustdagop is not None and self.index.rust_ext_compat:
1355 return rustdagop.headrevs(self.index, revs)
1355 return rustdagop.headrevs(self.index, revs)
1356 return dagop.headrevs(revs, self._uncheckedparentrevs)
1356 return dagop.headrevs(revs, self._uncheckedparentrevs)
1357
1357
1358 def computephases(self, roots):
1358 def computephases(self, roots):
1359 return self.index.computephasesmapsets(roots)
1359 return self.index.computephasesmapsets(roots)
1360
1360
1361 def _headrevs(self):
1361 def _headrevs(self):
1362 count = len(self)
1362 count = len(self)
1363 if not count:
1363 if not count:
1364 return [nullrev]
1364 return [nullrev]
1365 # we won't iter over filtered rev so nobody is a head at start
1365 # we won't iter over filtered rev so nobody is a head at start
1366 ishead = [0] * (count + 1)
1366 ishead = [0] * (count + 1)
1367 index = self.index
1367 index = self.index
1368 for r in self:
1368 for r in self:
1369 ishead[r] = 1 # I may be an head
1369 ishead[r] = 1 # I may be an head
1370 e = index[r]
1370 e = index[r]
1371 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1371 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1372 return [r for r, val in enumerate(ishead) if val]
1372 return [r for r, val in enumerate(ishead) if val]
1373
1373
1374 def heads(self, start=None, stop=None):
1374 def heads(self, start=None, stop=None):
1375 """return the list of all nodes that have no children
1375 """return the list of all nodes that have no children
1376
1376
1377 if start is specified, only heads that are descendants of
1377 if start is specified, only heads that are descendants of
1378 start will be returned
1378 start will be returned
1379 if stop is specified, it will consider all the revs from stop
1379 if stop is specified, it will consider all the revs from stop
1380 as if they had no children
1380 as if they had no children
1381 """
1381 """
1382 if start is None and stop is None:
1382 if start is None and stop is None:
1383 if not len(self):
1383 if not len(self):
1384 return [self.nullid]
1384 return [self.nullid]
1385 return [self.node(r) for r in self.headrevs()]
1385 return [self.node(r) for r in self.headrevs()]
1386
1386
1387 if start is None:
1387 if start is None:
1388 start = nullrev
1388 start = nullrev
1389 else:
1389 else:
1390 start = self.rev(start)
1390 start = self.rev(start)
1391
1391
1392 stoprevs = {self.rev(n) for n in stop or []}
1392 stoprevs = {self.rev(n) for n in stop or []}
1393
1393
1394 revs = dagop.headrevssubset(
1394 revs = dagop.headrevssubset(
1395 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1395 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1396 )
1396 )
1397
1397
1398 return [self.node(rev) for rev in revs]
1398 return [self.node(rev) for rev in revs]
1399
1399
1400 def children(self, node):
1400 def children(self, node):
1401 """find the children of a given node"""
1401 """find the children of a given node"""
1402 c = []
1402 c = []
1403 p = self.rev(node)
1403 p = self.rev(node)
1404 for r in self.revs(start=p + 1):
1404 for r in self.revs(start=p + 1):
1405 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1405 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1406 if prevs:
1406 if prevs:
1407 for pr in prevs:
1407 for pr in prevs:
1408 if pr == p:
1408 if pr == p:
1409 c.append(self.node(r))
1409 c.append(self.node(r))
1410 elif p == nullrev:
1410 elif p == nullrev:
1411 c.append(self.node(r))
1411 c.append(self.node(r))
1412 return c
1412 return c
1413
1413
1414 def commonancestorsheads(self, a, b):
1414 def commonancestorsheads(self, a, b):
1415 """calculate all the heads of the common ancestors of nodes a and b"""
1415 """calculate all the heads of the common ancestors of nodes a and b"""
1416 a, b = self.rev(a), self.rev(b)
1416 a, b = self.rev(a), self.rev(b)
1417 ancs = self._commonancestorsheads(a, b)
1417 ancs = self._commonancestorsheads(a, b)
1418 return pycompat.maplist(self.node, ancs)
1418 return pycompat.maplist(self.node, ancs)
1419
1419
1420 def _commonancestorsheads(self, *revs):
1420 def _commonancestorsheads(self, *revs):
1421 """calculate all the heads of the common ancestors of revs"""
1421 """calculate all the heads of the common ancestors of revs"""
1422 try:
1422 try:
1423 ancs = self.index.commonancestorsheads(*revs)
1423 ancs = self.index.commonancestorsheads(*revs)
1424 except (AttributeError, OverflowError): # C implementation failed
1424 except (AttributeError, OverflowError): # C implementation failed
1425 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1425 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1426 return ancs
1426 return ancs
1427
1427
1428 def isancestor(self, a, b):
1428 def isancestor(self, a, b):
1429 """return True if node a is an ancestor of node b
1429 """return True if node a is an ancestor of node b
1430
1430
1431 A revision is considered an ancestor of itself."""
1431 A revision is considered an ancestor of itself."""
1432 a, b = self.rev(a), self.rev(b)
1432 a, b = self.rev(a), self.rev(b)
1433 return self.isancestorrev(a, b)
1433 return self.isancestorrev(a, b)
1434
1434
1435 def isancestorrev(self, a, b):
1435 def isancestorrev(self, a, b):
1436 """return True if revision a is an ancestor of revision b
1436 """return True if revision a is an ancestor of revision b
1437
1437
1438 A revision is considered an ancestor of itself.
1438 A revision is considered an ancestor of itself.
1439
1439
1440 The implementation of this is trivial but the use of
1440 The implementation of this is trivial but the use of
1441 reachableroots is not."""
1441 reachableroots is not."""
1442 if a == nullrev:
1442 if a == nullrev:
1443 return True
1443 return True
1444 elif a == b:
1444 elif a == b:
1445 return True
1445 return True
1446 elif a > b:
1446 elif a > b:
1447 return False
1447 return False
1448 return bool(self.reachableroots(a, [b], [a], includepath=False))
1448 return bool(self.reachableroots(a, [b], [a], includepath=False))
1449
1449
1450 def reachableroots(self, minroot, heads, roots, includepath=False):
1450 def reachableroots(self, minroot, heads, roots, includepath=False):
1451 """return (heads(::(<roots> and <roots>::<heads>)))
1451 """return (heads(::(<roots> and <roots>::<heads>)))
1452
1452
1453 If includepath is True, return (<roots>::<heads>)."""
1453 If includepath is True, return (<roots>::<heads>)."""
1454 try:
1454 try:
1455 return self.index.reachableroots2(
1455 return self.index.reachableroots2(
1456 minroot, heads, roots, includepath
1456 minroot, heads, roots, includepath
1457 )
1457 )
1458 except AttributeError:
1458 except AttributeError:
1459 return dagop._reachablerootspure(
1459 return dagop._reachablerootspure(
1460 self.parentrevs, minroot, roots, heads, includepath
1460 self.parentrevs, minroot, roots, heads, includepath
1461 )
1461 )
1462
1462
1463 def ancestor(self, a, b):
1463 def ancestor(self, a, b):
1464 """calculate the "best" common ancestor of nodes a and b"""
1464 """calculate the "best" common ancestor of nodes a and b"""
1465
1465
1466 a, b = self.rev(a), self.rev(b)
1466 a, b = self.rev(a), self.rev(b)
1467 try:
1467 try:
1468 ancs = self.index.ancestors(a, b)
1468 ancs = self.index.ancestors(a, b)
1469 except (AttributeError, OverflowError):
1469 except (AttributeError, OverflowError):
1470 ancs = ancestor.ancestors(self.parentrevs, a, b)
1470 ancs = ancestor.ancestors(self.parentrevs, a, b)
1471 if ancs:
1471 if ancs:
1472 # choose a consistent winner when there's a tie
1472 # choose a consistent winner when there's a tie
1473 return min(map(self.node, ancs))
1473 return min(map(self.node, ancs))
1474 return self.nullid
1474 return self.nullid
1475
1475
1476 def _match(self, id):
1476 def _match(self, id):
1477 if isinstance(id, int):
1477 if isinstance(id, int):
1478 # rev
1478 # rev
1479 return self.node(id)
1479 return self.node(id)
1480 if len(id) == self.nodeconstants.nodelen:
1480 if len(id) == self.nodeconstants.nodelen:
1481 # possibly a binary node
1481 # possibly a binary node
1482 # odds of a binary node being all hex in ASCII are 1 in 10**25
1482 # odds of a binary node being all hex in ASCII are 1 in 10**25
1483 try:
1483 try:
1484 node = id
1484 node = id
1485 self.rev(node) # quick search the index
1485 self.rev(node) # quick search the index
1486 return node
1486 return node
1487 except error.LookupError:
1487 except error.LookupError:
1488 pass # may be partial hex id
1488 pass # may be partial hex id
1489 try:
1489 try:
1490 # str(rev)
1490 # str(rev)
1491 rev = int(id)
1491 rev = int(id)
1492 if b"%d" % rev != id:
1492 if b"%d" % rev != id:
1493 raise ValueError
1493 raise ValueError
1494 if rev < 0:
1494 if rev < 0:
1495 rev = len(self) + rev
1495 rev = len(self) + rev
1496 if rev < 0 or rev >= len(self):
1496 if rev < 0 or rev >= len(self):
1497 raise ValueError
1497 raise ValueError
1498 return self.node(rev)
1498 return self.node(rev)
1499 except (ValueError, OverflowError):
1499 except (ValueError, OverflowError):
1500 pass
1500 pass
1501 if len(id) == 2 * self.nodeconstants.nodelen:
1501 if len(id) == 2 * self.nodeconstants.nodelen:
1502 try:
1502 try:
1503 # a full hex nodeid?
1503 # a full hex nodeid?
1504 node = bin(id)
1504 node = bin(id)
1505 self.rev(node)
1505 self.rev(node)
1506 return node
1506 return node
1507 except (binascii.Error, error.LookupError):
1507 except (binascii.Error, error.LookupError):
1508 pass
1508 pass
1509
1509
1510 def _partialmatch(self, id):
1510 def _partialmatch(self, id):
1511 # we don't care wdirfilenodeids as they should be always full hash
1511 # we don't care wdirfilenodeids as they should be always full hash
1512 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1512 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1513 ambiguous = False
1513 ambiguous = False
1514 try:
1514 try:
1515 partial = self.index.partialmatch(id)
1515 partial = self.index.partialmatch(id)
1516 if partial and self.hasnode(partial):
1516 if partial and self.hasnode(partial):
1517 if maybewdir:
1517 if maybewdir:
1518 # single 'ff...' match in radix tree, ambiguous with wdir
1518 # single 'ff...' match in radix tree, ambiguous with wdir
1519 ambiguous = True
1519 ambiguous = True
1520 else:
1520 else:
1521 return partial
1521 return partial
1522 elif maybewdir:
1522 elif maybewdir:
1523 # no 'ff...' match in radix tree, wdir identified
1523 # no 'ff...' match in radix tree, wdir identified
1524 raise error.WdirUnsupported
1524 raise error.WdirUnsupported
1525 else:
1525 else:
1526 return None
1526 return None
1527 except error.RevlogError:
1527 except error.RevlogError:
1528 # parsers.c radix tree lookup gave multiple matches
1528 # parsers.c radix tree lookup gave multiple matches
1529 # fast path: for unfiltered changelog, radix tree is accurate
1529 # fast path: for unfiltered changelog, radix tree is accurate
1530 if not getattr(self, 'filteredrevs', None):
1530 if not getattr(self, 'filteredrevs', None):
1531 ambiguous = True
1531 ambiguous = True
1532 # fall through to slow path that filters hidden revisions
1532 # fall through to slow path that filters hidden revisions
1533 except (AttributeError, ValueError):
1533 except (AttributeError, ValueError):
1534 # we are pure python, or key is not hex
1534 # we are pure python, or key is not hex
1535 pass
1535 pass
1536 if ambiguous:
1536 if ambiguous:
1537 raise error.AmbiguousPrefixLookupError(
1537 raise error.AmbiguousPrefixLookupError(
1538 id, self.display_id, _(b'ambiguous identifier')
1538 id, self.display_id, _(b'ambiguous identifier')
1539 )
1539 )
1540
1540
1541 if id in self._pcache:
1541 if id in self._pcache:
1542 return self._pcache[id]
1542 return self._pcache[id]
1543
1543
1544 if len(id) <= 40:
1544 if len(id) <= 40:
1545 # hex(node)[:...]
1545 # hex(node)[:...]
1546 l = len(id) // 2 * 2 # grab an even number of digits
1546 l = len(id) // 2 * 2 # grab an even number of digits
1547 try:
1547 try:
1548 # we're dropping the last digit, so let's check that it's hex,
1548 # we're dropping the last digit, so let's check that it's hex,
1549 # to avoid the expensive computation below if it's not
1549 # to avoid the expensive computation below if it's not
1550 if len(id) % 2 > 0:
1550 if len(id) % 2 > 0:
1551 if not (id[-1] in hexdigits):
1551 if not (id[-1] in hexdigits):
1552 return None
1552 return None
1553 prefix = bin(id[:l])
1553 prefix = bin(id[:l])
1554 except binascii.Error:
1554 except binascii.Error:
1555 pass
1555 pass
1556 else:
1556 else:
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1558 nl = [
1558 nl = [
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1560 ]
1560 ]
1561 if self.nodeconstants.nullhex.startswith(id):
1561 if self.nodeconstants.nullhex.startswith(id):
1562 nl.append(self.nullid)
1562 nl.append(self.nullid)
1563 if len(nl) > 0:
1563 if len(nl) > 0:
1564 if len(nl) == 1 and not maybewdir:
1564 if len(nl) == 1 and not maybewdir:
1565 self._pcache[id] = nl[0]
1565 self._pcache[id] = nl[0]
1566 return nl[0]
1566 return nl[0]
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570 if maybewdir:
1570 if maybewdir:
1571 raise error.WdirUnsupported
1571 raise error.WdirUnsupported
1572 return None
1572 return None
1573
1573
1574 def lookup(self, id):
1574 def lookup(self, id):
1575 """locate a node based on:
1575 """locate a node based on:
1576 - revision number or str(revision number)
1576 - revision number or str(revision number)
1577 - nodeid or subset of hex nodeid
1577 - nodeid or subset of hex nodeid
1578 """
1578 """
1579 n = self._match(id)
1579 n = self._match(id)
1580 if n is not None:
1580 if n is not None:
1581 return n
1581 return n
1582 n = self._partialmatch(id)
1582 n = self._partialmatch(id)
1583 if n:
1583 if n:
1584 return n
1584 return n
1585
1585
1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1587
1587
1588 def shortest(self, node, minlength=1):
1588 def shortest(self, node, minlength=1):
1589 """Find the shortest unambiguous prefix that matches node."""
1589 """Find the shortest unambiguous prefix that matches node."""
1590
1590
1591 def isvalid(prefix):
1591 def isvalid(prefix):
1592 try:
1592 try:
1593 matchednode = self._partialmatch(prefix)
1593 matchednode = self._partialmatch(prefix)
1594 except error.AmbiguousPrefixLookupError:
1594 except error.AmbiguousPrefixLookupError:
1595 return False
1595 return False
1596 except error.WdirUnsupported:
1596 except error.WdirUnsupported:
1597 # single 'ff...' match
1597 # single 'ff...' match
1598 return True
1598 return True
1599 if matchednode is None:
1599 if matchednode is None:
1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1601 return True
1601 return True
1602
1602
1603 def maybewdir(prefix):
1603 def maybewdir(prefix):
1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1605
1605
1606 hexnode = hex(node)
1606 hexnode = hex(node)
1607
1607
1608 def disambiguate(hexnode, minlength):
1608 def disambiguate(hexnode, minlength):
1609 """Disambiguate against wdirid."""
1609 """Disambiguate against wdirid."""
1610 for length in range(minlength, len(hexnode) + 1):
1610 for length in range(minlength, len(hexnode) + 1):
1611 prefix = hexnode[:length]
1611 prefix = hexnode[:length]
1612 if not maybewdir(prefix):
1612 if not maybewdir(prefix):
1613 return prefix
1613 return prefix
1614
1614
1615 if not getattr(self, 'filteredrevs', None):
1615 if not getattr(self, 'filteredrevs', None):
1616 try:
1616 try:
1617 length = max(self.index.shortest(node), minlength)
1617 length = max(self.index.shortest(node), minlength)
1618 return disambiguate(hexnode, length)
1618 return disambiguate(hexnode, length)
1619 except error.RevlogError:
1619 except error.RevlogError:
1620 if node != self.nodeconstants.wdirid:
1620 if node != self.nodeconstants.wdirid:
1621 raise error.LookupError(
1621 raise error.LookupError(
1622 node, self.display_id, _(b'no node')
1622 node, self.display_id, _(b'no node')
1623 )
1623 )
1624 except AttributeError:
1624 except AttributeError:
1625 # Fall through to pure code
1625 # Fall through to pure code
1626 pass
1626 pass
1627
1627
1628 if node == self.nodeconstants.wdirid:
1628 if node == self.nodeconstants.wdirid:
1629 for length in range(minlength, len(hexnode) + 1):
1629 for length in range(minlength, len(hexnode) + 1):
1630 prefix = hexnode[:length]
1630 prefix = hexnode[:length]
1631 if isvalid(prefix):
1631 if isvalid(prefix):
1632 return prefix
1632 return prefix
1633
1633
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if isvalid(prefix):
1636 if isvalid(prefix):
1637 return disambiguate(hexnode, length)
1637 return disambiguate(hexnode, length)
1638
1638
1639 def cmp(self, node, text):
1639 def cmp(self, node, text):
1640 """compare text with a given file revision
1640 """compare text with a given file revision
1641
1641
1642 returns True if text is different than what is stored.
1642 returns True if text is different than what is stored.
1643 """
1643 """
1644 p1, p2 = self.parents(node)
1644 p1, p2 = self.parents(node)
1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1646
1646
1647 def _getsegmentforrevs(self, startrev, endrev, df=None):
1647 def _getsegmentforrevs(self, startrev, endrev, df=None):
1648 """Obtain a segment of raw data corresponding to a range of revisions.
1648 """Obtain a segment of raw data corresponding to a range of revisions.
1649
1649
1650 Accepts the start and end revisions and an optional already-open
1650 Accepts the start and end revisions and an optional already-open
1651 file handle to be used for reading. If the file handle is read, its
1651 file handle to be used for reading. If the file handle is read, its
1652 seek position will not be preserved.
1652 seek position will not be preserved.
1653
1653
1654 Requests for data may be satisfied by a cache.
1654 Requests for data may be satisfied by a cache.
1655
1655
1656 Returns a 2-tuple of (offset, data) for the requested range of
1656 Returns a 2-tuple of (offset, data) for the requested range of
1657 revisions. Offset is the integer offset from the beginning of the
1657 revisions. Offset is the integer offset from the beginning of the
1658 revlog and data is a str or buffer of the raw byte data.
1658 revlog and data is a str or buffer of the raw byte data.
1659
1659
1660 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1660 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1661 to determine where each revision's data begins and ends.
1661 to determine where each revision's data begins and ends.
1662 """
1662 """
1663 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1663 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1664 # (functions are expensive).
1664 # (functions are expensive).
1665 index = self.index
1665 index = self.index
1666 istart = index[startrev]
1666 istart = index[startrev]
1667 start = int(istart[0] >> 16)
1667 start = int(istart[0] >> 16)
1668 if startrev == endrev:
1668 if startrev == endrev:
1669 end = start + istart[1]
1669 end = start + istart[1]
1670 else:
1670 else:
1671 iend = index[endrev]
1671 iend = index[endrev]
1672 end = int(iend[0] >> 16) + iend[1]
1672 end = int(iend[0] >> 16) + iend[1]
1673
1673
1674 if self._inline:
1674 if self._inline:
1675 start += (startrev + 1) * self.index.entry_size
1675 start += (startrev + 1) * self.index.entry_size
1676 end += (endrev + 1) * self.index.entry_size
1676 end += (endrev + 1) * self.index.entry_size
1677 length = end - start
1677 length = end - start
1678
1678
1679 return start, self._segmentfile.read_chunk(start, length, df)
1679 return start, self._segmentfile.read_chunk(start, length, df)
1680
1680
1681 def _chunk(self, rev, df=None):
1681 def _chunk(self, rev, df=None):
1682 """Obtain a single decompressed chunk for a revision.
1682 """Obtain a single decompressed chunk for a revision.
1683
1683
1684 Accepts an integer revision and an optional already-open file handle
1684 Accepts an integer revision and an optional already-open file handle
1685 to be used for reading. If used, the seek position of the file will not
1685 to be used for reading. If used, the seek position of the file will not
1686 be preserved.
1686 be preserved.
1687
1687
1688 Returns a str holding uncompressed data for the requested revision.
1688 Returns a str holding uncompressed data for the requested revision.
1689 """
1689 """
1690 compression_mode = self.index[rev][10]
1690 compression_mode = self.index[rev][10]
1691 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1691 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1692 if compression_mode == COMP_MODE_PLAIN:
1692 if compression_mode == COMP_MODE_PLAIN:
1693 return data
1693 return data
1694 elif compression_mode == COMP_MODE_DEFAULT:
1694 elif compression_mode == COMP_MODE_DEFAULT:
1695 return self._decompressor(data)
1695 return self._decompressor(data)
1696 elif compression_mode == COMP_MODE_INLINE:
1696 elif compression_mode == COMP_MODE_INLINE:
1697 return self.decompress(data)
1697 return self.decompress(data)
1698 else:
1698 else:
1699 msg = b'unknown compression mode %d'
1699 msg = b'unknown compression mode %d'
1700 msg %= compression_mode
1700 msg %= compression_mode
1701 raise error.RevlogError(msg)
1701 raise error.RevlogError(msg)
1702
1702
1703 def _chunks(self, revs, df=None, targetsize=None):
1703 def _chunks(self, revs, df=None, targetsize=None):
1704 """Obtain decompressed chunks for the specified revisions.
1704 """Obtain decompressed chunks for the specified revisions.
1705
1705
1706 Accepts an iterable of numeric revisions that are assumed to be in
1706 Accepts an iterable of numeric revisions that are assumed to be in
1707 ascending order. Also accepts an optional already-open file handle
1707 ascending order. Also accepts an optional already-open file handle
1708 to be used for reading. If used, the seek position of the file will
1708 to be used for reading. If used, the seek position of the file will
1709 not be preserved.
1709 not be preserved.
1710
1710
1711 This function is similar to calling ``self._chunk()`` multiple times,
1711 This function is similar to calling ``self._chunk()`` multiple times,
1712 but is faster.
1712 but is faster.
1713
1713
1714 Returns a list with decompressed data for each requested revision.
1714 Returns a list with decompressed data for each requested revision.
1715 """
1715 """
1716 if not revs:
1716 if not revs:
1717 return []
1717 return []
1718 start = self.start
1718 start = self.start
1719 length = self.length
1719 length = self.length
1720 inline = self._inline
1720 inline = self._inline
1721 iosize = self.index.entry_size
1721 iosize = self.index.entry_size
1722 buffer = util.buffer
1722 buffer = util.buffer
1723
1723
1724 l = []
1724 l = []
1725 ladd = l.append
1725 ladd = l.append
1726
1726
1727 if not self._withsparseread:
1727 if not self._withsparseread:
1728 slicedchunks = (revs,)
1728 slicedchunks = (revs,)
1729 else:
1729 else:
1730 slicedchunks = deltautil.slicechunk(
1730 slicedchunks = deltautil.slicechunk(
1731 self, revs, targetsize=targetsize
1731 self, revs, targetsize=targetsize
1732 )
1732 )
1733
1733
1734 for revschunk in slicedchunks:
1734 for revschunk in slicedchunks:
1735 firstrev = revschunk[0]
1735 firstrev = revschunk[0]
1736 # Skip trailing revisions with empty diff
1736 # Skip trailing revisions with empty diff
1737 for lastrev in revschunk[::-1]:
1737 for lastrev in revschunk[::-1]:
1738 if length(lastrev) != 0:
1738 if length(lastrev) != 0:
1739 break
1739 break
1740
1740
1741 try:
1741 try:
1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1742 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1743 except OverflowError:
1743 except OverflowError:
1744 # issue4215 - we can't cache a run of chunks greater than
1744 # issue4215 - we can't cache a run of chunks greater than
1745 # 2G on Windows
1745 # 2G on Windows
1746 return [self._chunk(rev, df=df) for rev in revschunk]
1746 return [self._chunk(rev, df=df) for rev in revschunk]
1747
1747
1748 decomp = self.decompress
1748 decomp = self.decompress
1749 # self._decompressor might be None, but will not be used in that case
1749 # self._decompressor might be None, but will not be used in that case
1750 def_decomp = self._decompressor
1750 def_decomp = self._decompressor
1751 for rev in revschunk:
1751 for rev in revschunk:
1752 chunkstart = start(rev)
1752 chunkstart = start(rev)
1753 if inline:
1753 if inline:
1754 chunkstart += (rev + 1) * iosize
1754 chunkstart += (rev + 1) * iosize
1755 chunklength = length(rev)
1755 chunklength = length(rev)
1756 comp_mode = self.index[rev][10]
1756 comp_mode = self.index[rev][10]
1757 c = buffer(data, chunkstart - offset, chunklength)
1757 c = buffer(data, chunkstart - offset, chunklength)
1758 if comp_mode == COMP_MODE_PLAIN:
1758 if comp_mode == COMP_MODE_PLAIN:
1759 ladd(c)
1759 ladd(c)
1760 elif comp_mode == COMP_MODE_INLINE:
1760 elif comp_mode == COMP_MODE_INLINE:
1761 ladd(decomp(c))
1761 ladd(decomp(c))
1762 elif comp_mode == COMP_MODE_DEFAULT:
1762 elif comp_mode == COMP_MODE_DEFAULT:
1763 ladd(def_decomp(c))
1763 ladd(def_decomp(c))
1764 else:
1764 else:
1765 msg = b'unknown compression mode %d'
1765 msg = b'unknown compression mode %d'
1766 msg %= comp_mode
1766 msg %= comp_mode
1767 raise error.RevlogError(msg)
1767 raise error.RevlogError(msg)
1768
1768
1769 return l
1769 return l
1770
1770
1771 def deltaparent(self, rev):
1771 def deltaparent(self, rev):
1772 """return deltaparent of the given revision"""
1772 """return deltaparent of the given revision"""
1773 base = self.index[rev][3]
1773 base = self.index[rev][3]
1774 if base == rev:
1774 if base == rev:
1775 return nullrev
1775 return nullrev
1776 elif self._generaldelta:
1776 elif self._generaldelta:
1777 return base
1777 return base
1778 else:
1778 else:
1779 return rev - 1
1779 return rev - 1
1780
1780
1781 def issnapshot(self, rev):
1781 def issnapshot(self, rev):
1782 """tells whether rev is a snapshot"""
1782 """tells whether rev is a snapshot"""
1783 if not self._sparserevlog:
1783 if not self._sparserevlog:
1784 return self.deltaparent(rev) == nullrev
1784 return self.deltaparent(rev) == nullrev
1785 elif util.safehasattr(self.index, b'issnapshot'):
1785 elif util.safehasattr(self.index, 'issnapshot'):
1786 # directly assign the method to cache the testing and access
1786 # directly assign the method to cache the testing and access
1787 self.issnapshot = self.index.issnapshot
1787 self.issnapshot = self.index.issnapshot
1788 return self.issnapshot(rev)
1788 return self.issnapshot(rev)
1789 if rev == nullrev:
1789 if rev == nullrev:
1790 return True
1790 return True
1791 entry = self.index[rev]
1791 entry = self.index[rev]
1792 base = entry[3]
1792 base = entry[3]
1793 if base == rev:
1793 if base == rev:
1794 return True
1794 return True
1795 if base == nullrev:
1795 if base == nullrev:
1796 return True
1796 return True
1797 p1 = entry[5]
1797 p1 = entry[5]
1798 while self.length(p1) == 0:
1798 while self.length(p1) == 0:
1799 b = self.deltaparent(p1)
1799 b = self.deltaparent(p1)
1800 if b == p1:
1800 if b == p1:
1801 break
1801 break
1802 p1 = b
1802 p1 = b
1803 p2 = entry[6]
1803 p2 = entry[6]
1804 while self.length(p2) == 0:
1804 while self.length(p2) == 0:
1805 b = self.deltaparent(p2)
1805 b = self.deltaparent(p2)
1806 if b == p2:
1806 if b == p2:
1807 break
1807 break
1808 p2 = b
1808 p2 = b
1809 if base == p1 or base == p2:
1809 if base == p1 or base == p2:
1810 return False
1810 return False
1811 return self.issnapshot(base)
1811 return self.issnapshot(base)
1812
1812
1813 def snapshotdepth(self, rev):
1813 def snapshotdepth(self, rev):
1814 """number of snapshot in the chain before this one"""
1814 """number of snapshot in the chain before this one"""
1815 if not self.issnapshot(rev):
1815 if not self.issnapshot(rev):
1816 raise error.ProgrammingError(b'revision %d not a snapshot')
1816 raise error.ProgrammingError(b'revision %d not a snapshot')
1817 return len(self._deltachain(rev)[0]) - 1
1817 return len(self._deltachain(rev)[0]) - 1
1818
1818
1819 def revdiff(self, rev1, rev2):
1819 def revdiff(self, rev1, rev2):
1820 """return or calculate a delta between two revisions
1820 """return or calculate a delta between two revisions
1821
1821
1822 The delta calculated is in binary form and is intended to be written to
1822 The delta calculated is in binary form and is intended to be written to
1823 revlog data directly. So this function needs raw revision data.
1823 revlog data directly. So this function needs raw revision data.
1824 """
1824 """
1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1825 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1826 return bytes(self._chunk(rev2))
1826 return bytes(self._chunk(rev2))
1827
1827
1828 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1828 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1829
1829
1830 def revision(self, nodeorrev, _df=None):
1830 def revision(self, nodeorrev, _df=None):
1831 """return an uncompressed revision of a given node or revision
1831 """return an uncompressed revision of a given node or revision
1832 number.
1832 number.
1833
1833
1834 _df - an existing file handle to read from. (internal-only)
1834 _df - an existing file handle to read from. (internal-only)
1835 """
1835 """
1836 return self._revisiondata(nodeorrev, _df)
1836 return self._revisiondata(nodeorrev, _df)
1837
1837
1838 def sidedata(self, nodeorrev, _df=None):
1838 def sidedata(self, nodeorrev, _df=None):
1839 """a map of extra data related to the changeset but not part of the hash
1839 """a map of extra data related to the changeset but not part of the hash
1840
1840
1841 This function currently return a dictionary. However, more advanced
1841 This function currently return a dictionary. However, more advanced
1842 mapping object will likely be used in the future for a more
1842 mapping object will likely be used in the future for a more
1843 efficient/lazy code.
1843 efficient/lazy code.
1844 """
1844 """
1845 # deal with <nodeorrev> argument type
1845 # deal with <nodeorrev> argument type
1846 if isinstance(nodeorrev, int):
1846 if isinstance(nodeorrev, int):
1847 rev = nodeorrev
1847 rev = nodeorrev
1848 else:
1848 else:
1849 rev = self.rev(nodeorrev)
1849 rev = self.rev(nodeorrev)
1850 return self._sidedata(rev)
1850 return self._sidedata(rev)
1851
1851
1852 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1852 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1853 # deal with <nodeorrev> argument type
1853 # deal with <nodeorrev> argument type
1854 if isinstance(nodeorrev, int):
1854 if isinstance(nodeorrev, int):
1855 rev = nodeorrev
1855 rev = nodeorrev
1856 node = self.node(rev)
1856 node = self.node(rev)
1857 else:
1857 else:
1858 node = nodeorrev
1858 node = nodeorrev
1859 rev = None
1859 rev = None
1860
1860
1861 # fast path the special `nullid` rev
1861 # fast path the special `nullid` rev
1862 if node == self.nullid:
1862 if node == self.nullid:
1863 return b""
1863 return b""
1864
1864
1865 # ``rawtext`` is the text as stored inside the revlog. Might be the
1865 # ``rawtext`` is the text as stored inside the revlog. Might be the
1866 # revision or might need to be processed to retrieve the revision.
1866 # revision or might need to be processed to retrieve the revision.
1867 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1867 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1868
1868
1869 if raw and validated:
1869 if raw and validated:
1870 # if we don't want to process the raw text and that raw
1870 # if we don't want to process the raw text and that raw
1871 # text is cached, we can exit early.
1871 # text is cached, we can exit early.
1872 return rawtext
1872 return rawtext
1873 if rev is None:
1873 if rev is None:
1874 rev = self.rev(node)
1874 rev = self.rev(node)
1875 # the revlog's flag for this revision
1875 # the revlog's flag for this revision
1876 # (usually alter its state or content)
1876 # (usually alter its state or content)
1877 flags = self.flags(rev)
1877 flags = self.flags(rev)
1878
1878
1879 if validated and flags == REVIDX_DEFAULT_FLAGS:
1879 if validated and flags == REVIDX_DEFAULT_FLAGS:
1880 # no extra flags set, no flag processor runs, text = rawtext
1880 # no extra flags set, no flag processor runs, text = rawtext
1881 return rawtext
1881 return rawtext
1882
1882
1883 if raw:
1883 if raw:
1884 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1884 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1885 text = rawtext
1885 text = rawtext
1886 else:
1886 else:
1887 r = flagutil.processflagsread(self, rawtext, flags)
1887 r = flagutil.processflagsread(self, rawtext, flags)
1888 text, validatehash = r
1888 text, validatehash = r
1889 if validatehash:
1889 if validatehash:
1890 self.checkhash(text, node, rev=rev)
1890 self.checkhash(text, node, rev=rev)
1891 if not validated:
1891 if not validated:
1892 self._revisioncache = (node, rev, rawtext)
1892 self._revisioncache = (node, rev, rawtext)
1893
1893
1894 return text
1894 return text
1895
1895
1896 def _rawtext(self, node, rev, _df=None):
1896 def _rawtext(self, node, rev, _df=None):
1897 """return the possibly unvalidated rawtext for a revision
1897 """return the possibly unvalidated rawtext for a revision
1898
1898
1899 returns (rev, rawtext, validated)
1899 returns (rev, rawtext, validated)
1900 """
1900 """
1901
1901
1902 # revision in the cache (could be useful to apply delta)
1902 # revision in the cache (could be useful to apply delta)
1903 cachedrev = None
1903 cachedrev = None
1904 # An intermediate text to apply deltas to
1904 # An intermediate text to apply deltas to
1905 basetext = None
1905 basetext = None
1906
1906
1907 # Check if we have the entry in cache
1907 # Check if we have the entry in cache
1908 # The cache entry looks like (node, rev, rawtext)
1908 # The cache entry looks like (node, rev, rawtext)
1909 if self._revisioncache:
1909 if self._revisioncache:
1910 if self._revisioncache[0] == node:
1910 if self._revisioncache[0] == node:
1911 return (rev, self._revisioncache[2], True)
1911 return (rev, self._revisioncache[2], True)
1912 cachedrev = self._revisioncache[1]
1912 cachedrev = self._revisioncache[1]
1913
1913
1914 if rev is None:
1914 if rev is None:
1915 rev = self.rev(node)
1915 rev = self.rev(node)
1916
1916
1917 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1917 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1918 if stopped:
1918 if stopped:
1919 basetext = self._revisioncache[2]
1919 basetext = self._revisioncache[2]
1920
1920
1921 # drop cache to save memory, the caller is expected to
1921 # drop cache to save memory, the caller is expected to
1922 # update self._revisioncache after validating the text
1922 # update self._revisioncache after validating the text
1923 self._revisioncache = None
1923 self._revisioncache = None
1924
1924
1925 targetsize = None
1925 targetsize = None
1926 rawsize = self.index[rev][2]
1926 rawsize = self.index[rev][2]
1927 if 0 <= rawsize:
1927 if 0 <= rawsize:
1928 targetsize = 4 * rawsize
1928 targetsize = 4 * rawsize
1929
1929
1930 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1930 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1931 if basetext is None:
1931 if basetext is None:
1932 basetext = bytes(bins[0])
1932 basetext = bytes(bins[0])
1933 bins = bins[1:]
1933 bins = bins[1:]
1934
1934
1935 rawtext = mdiff.patches(basetext, bins)
1935 rawtext = mdiff.patches(basetext, bins)
1936 del basetext # let us have a chance to free memory early
1936 del basetext # let us have a chance to free memory early
1937 return (rev, rawtext, False)
1937 return (rev, rawtext, False)
1938
1938
1939 def _sidedata(self, rev):
1939 def _sidedata(self, rev):
1940 """Return the sidedata for a given revision number."""
1940 """Return the sidedata for a given revision number."""
1941 index_entry = self.index[rev]
1941 index_entry = self.index[rev]
1942 sidedata_offset = index_entry[8]
1942 sidedata_offset = index_entry[8]
1943 sidedata_size = index_entry[9]
1943 sidedata_size = index_entry[9]
1944
1944
1945 if self._inline:
1945 if self._inline:
1946 sidedata_offset += self.index.entry_size * (1 + rev)
1946 sidedata_offset += self.index.entry_size * (1 + rev)
1947 if sidedata_size == 0:
1947 if sidedata_size == 0:
1948 return {}
1948 return {}
1949
1949
1950 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1950 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1951 filename = self._sidedatafile
1951 filename = self._sidedatafile
1952 end = self._docket.sidedata_end
1952 end = self._docket.sidedata_end
1953 offset = sidedata_offset
1953 offset = sidedata_offset
1954 length = sidedata_size
1954 length = sidedata_size
1955 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1955 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1956 raise error.RevlogError(m)
1956 raise error.RevlogError(m)
1957
1957
1958 comp_segment = self._segmentfile_sidedata.read_chunk(
1958 comp_segment = self._segmentfile_sidedata.read_chunk(
1959 sidedata_offset, sidedata_size
1959 sidedata_offset, sidedata_size
1960 )
1960 )
1961
1961
1962 comp = self.index[rev][11]
1962 comp = self.index[rev][11]
1963 if comp == COMP_MODE_PLAIN:
1963 if comp == COMP_MODE_PLAIN:
1964 segment = comp_segment
1964 segment = comp_segment
1965 elif comp == COMP_MODE_DEFAULT:
1965 elif comp == COMP_MODE_DEFAULT:
1966 segment = self._decompressor(comp_segment)
1966 segment = self._decompressor(comp_segment)
1967 elif comp == COMP_MODE_INLINE:
1967 elif comp == COMP_MODE_INLINE:
1968 segment = self.decompress(comp_segment)
1968 segment = self.decompress(comp_segment)
1969 else:
1969 else:
1970 msg = b'unknown compression mode %d'
1970 msg = b'unknown compression mode %d'
1971 msg %= comp
1971 msg %= comp
1972 raise error.RevlogError(msg)
1972 raise error.RevlogError(msg)
1973
1973
1974 sidedata = sidedatautil.deserialize_sidedata(segment)
1974 sidedata = sidedatautil.deserialize_sidedata(segment)
1975 return sidedata
1975 return sidedata
1976
1976
1977 def rawdata(self, nodeorrev, _df=None):
1977 def rawdata(self, nodeorrev, _df=None):
1978 """return an uncompressed raw data of a given node or revision number.
1978 """return an uncompressed raw data of a given node or revision number.
1979
1979
1980 _df - an existing file handle to read from. (internal-only)
1980 _df - an existing file handle to read from. (internal-only)
1981 """
1981 """
1982 return self._revisiondata(nodeorrev, _df, raw=True)
1982 return self._revisiondata(nodeorrev, _df, raw=True)
1983
1983
1984 def hash(self, text, p1, p2):
1984 def hash(self, text, p1, p2):
1985 """Compute a node hash.
1985 """Compute a node hash.
1986
1986
1987 Available as a function so that subclasses can replace the hash
1987 Available as a function so that subclasses can replace the hash
1988 as needed.
1988 as needed.
1989 """
1989 """
1990 return storageutil.hashrevisionsha1(text, p1, p2)
1990 return storageutil.hashrevisionsha1(text, p1, p2)
1991
1991
1992 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1992 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1993 """Check node hash integrity.
1993 """Check node hash integrity.
1994
1994
1995 Available as a function so that subclasses can extend hash mismatch
1995 Available as a function so that subclasses can extend hash mismatch
1996 behaviors as needed.
1996 behaviors as needed.
1997 """
1997 """
1998 try:
1998 try:
1999 if p1 is None and p2 is None:
1999 if p1 is None and p2 is None:
2000 p1, p2 = self.parents(node)
2000 p1, p2 = self.parents(node)
2001 if node != self.hash(text, p1, p2):
2001 if node != self.hash(text, p1, p2):
2002 # Clear the revision cache on hash failure. The revision cache
2002 # Clear the revision cache on hash failure. The revision cache
2003 # only stores the raw revision and clearing the cache does have
2003 # only stores the raw revision and clearing the cache does have
2004 # the side-effect that we won't have a cache hit when the raw
2004 # the side-effect that we won't have a cache hit when the raw
2005 # revision data is accessed. But this case should be rare and
2005 # revision data is accessed. But this case should be rare and
2006 # it is extra work to teach the cache about the hash
2006 # it is extra work to teach the cache about the hash
2007 # verification state.
2007 # verification state.
2008 if self._revisioncache and self._revisioncache[0] == node:
2008 if self._revisioncache and self._revisioncache[0] == node:
2009 self._revisioncache = None
2009 self._revisioncache = None
2010
2010
2011 revornode = rev
2011 revornode = rev
2012 if revornode is None:
2012 if revornode is None:
2013 revornode = templatefilters.short(hex(node))
2013 revornode = templatefilters.short(hex(node))
2014 raise error.RevlogError(
2014 raise error.RevlogError(
2015 _(b"integrity check failed on %s:%s")
2015 _(b"integrity check failed on %s:%s")
2016 % (self.display_id, pycompat.bytestr(revornode))
2016 % (self.display_id, pycompat.bytestr(revornode))
2017 )
2017 )
2018 except error.RevlogError:
2018 except error.RevlogError:
2019 if self._censorable and storageutil.iscensoredtext(text):
2019 if self._censorable and storageutil.iscensoredtext(text):
2020 raise error.CensoredNodeError(self.display_id, node, text)
2020 raise error.CensoredNodeError(self.display_id, node, text)
2021 raise
2021 raise
2022
2022
2023 def _enforceinlinesize(self, tr, side_write=True):
2023 def _enforceinlinesize(self, tr, side_write=True):
2024 """Check if the revlog is too big for inline and convert if so.
2024 """Check if the revlog is too big for inline and convert if so.
2025
2025
2026 This should be called after revisions are added to the revlog. If the
2026 This should be called after revisions are added to the revlog. If the
2027 revlog has grown too large to be an inline revlog, it will convert it
2027 revlog has grown too large to be an inline revlog, it will convert it
2028 to use multiple index and data files.
2028 to use multiple index and data files.
2029 """
2029 """
2030 tiprev = len(self) - 1
2030 tiprev = len(self) - 1
2031 total_size = self.start(tiprev) + self.length(tiprev)
2031 total_size = self.start(tiprev) + self.length(tiprev)
2032 if not self._inline or total_size < _maxinline:
2032 if not self._inline or total_size < _maxinline:
2033 return
2033 return
2034
2034
2035 troffset = tr.findoffset(self._indexfile)
2035 troffset = tr.findoffset(self._indexfile)
2036 if troffset is None:
2036 if troffset is None:
2037 raise error.RevlogError(
2037 raise error.RevlogError(
2038 _(b"%s not found in the transaction") % self._indexfile
2038 _(b"%s not found in the transaction") % self._indexfile
2039 )
2039 )
2040 if troffset:
2040 if troffset:
2041 tr.addbackup(self._indexfile, for_offset=True)
2041 tr.addbackup(self._indexfile, for_offset=True)
2042 tr.add(self._datafile, 0)
2042 tr.add(self._datafile, 0)
2043
2043
2044 existing_handles = False
2044 existing_handles = False
2045 if self._writinghandles is not None:
2045 if self._writinghandles is not None:
2046 existing_handles = True
2046 existing_handles = True
2047 fp = self._writinghandles[0]
2047 fp = self._writinghandles[0]
2048 fp.flush()
2048 fp.flush()
2049 fp.close()
2049 fp.close()
2050 # We can't use the cached file handle after close(). So prevent
2050 # We can't use the cached file handle after close(). So prevent
2051 # its usage.
2051 # its usage.
2052 self._writinghandles = None
2052 self._writinghandles = None
2053 self._segmentfile.writing_handle = None
2053 self._segmentfile.writing_handle = None
2054 # No need to deal with sidedata writing handle as it is only
2054 # No need to deal with sidedata writing handle as it is only
2055 # relevant with revlog-v2 which is never inline, not reaching
2055 # relevant with revlog-v2 which is never inline, not reaching
2056 # this code
2056 # this code
2057 if side_write:
2057 if side_write:
2058 old_index_file_path = self._indexfile
2058 old_index_file_path = self._indexfile
2059 new_index_file_path = self._indexfile + b'.s'
2059 new_index_file_path = self._indexfile + b'.s'
2060 opener = self.opener
2060 opener = self.opener
2061 weak_self = weakref.ref(self)
2061 weak_self = weakref.ref(self)
2062
2062
2063 # the "split" index replace the real index when the transaction is finalized
2063 # the "split" index replace the real index when the transaction is finalized
2064 def finalize_callback(tr):
2064 def finalize_callback(tr):
2065 opener.rename(
2065 opener.rename(
2066 new_index_file_path,
2066 new_index_file_path,
2067 old_index_file_path,
2067 old_index_file_path,
2068 checkambig=True,
2068 checkambig=True,
2069 )
2069 )
2070 maybe_self = weak_self()
2070 maybe_self = weak_self()
2071 if maybe_self is not None:
2071 if maybe_self is not None:
2072 maybe_self._indexfile = old_index_file_path
2072 maybe_self._indexfile = old_index_file_path
2073
2073
2074 def abort_callback(tr):
2074 def abort_callback(tr):
2075 maybe_self = weak_self()
2075 maybe_self = weak_self()
2076 if maybe_self is not None:
2076 if maybe_self is not None:
2077 maybe_self._indexfile = old_index_file_path
2077 maybe_self._indexfile = old_index_file_path
2078
2078
2079 tr.registertmp(new_index_file_path)
2079 tr.registertmp(new_index_file_path)
2080 if self.target[1] is not None:
2080 if self.target[1] is not None:
2081 callback_id = b'000-revlog-split-%d-%s' % self.target
2081 callback_id = b'000-revlog-split-%d-%s' % self.target
2082 else:
2082 else:
2083 callback_id = b'000-revlog-split-%d' % self.target[0]
2083 callback_id = b'000-revlog-split-%d' % self.target[0]
2084 tr.addfinalize(callback_id, finalize_callback)
2084 tr.addfinalize(callback_id, finalize_callback)
2085 tr.addabort(callback_id, abort_callback)
2085 tr.addabort(callback_id, abort_callback)
2086
2086
2087 new_dfh = self._datafp(b'w+')
2087 new_dfh = self._datafp(b'w+')
2088 new_dfh.truncate(0) # drop any potentially existing data
2088 new_dfh.truncate(0) # drop any potentially existing data
2089 try:
2089 try:
2090 with self._indexfp() as read_ifh:
2090 with self._indexfp() as read_ifh:
2091 for r in self:
2091 for r in self:
2092 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2092 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2093 new_dfh.flush()
2093 new_dfh.flush()
2094
2094
2095 if side_write:
2095 if side_write:
2096 self._indexfile = new_index_file_path
2096 self._indexfile = new_index_file_path
2097 with self.__index_new_fp() as fp:
2097 with self.__index_new_fp() as fp:
2098 self._format_flags &= ~FLAG_INLINE_DATA
2098 self._format_flags &= ~FLAG_INLINE_DATA
2099 self._inline = False
2099 self._inline = False
2100 for i in self:
2100 for i in self:
2101 e = self.index.entry_binary(i)
2101 e = self.index.entry_binary(i)
2102 if i == 0 and self._docket is None:
2102 if i == 0 and self._docket is None:
2103 header = self._format_flags | self._format_version
2103 header = self._format_flags | self._format_version
2104 header = self.index.pack_header(header)
2104 header = self.index.pack_header(header)
2105 e = header + e
2105 e = header + e
2106 fp.write(e)
2106 fp.write(e)
2107 if self._docket is not None:
2107 if self._docket is not None:
2108 self._docket.index_end = fp.tell()
2108 self._docket.index_end = fp.tell()
2109
2109
2110 # If we don't use side-write, the temp file replace the real
2110 # If we don't use side-write, the temp file replace the real
2111 # index when we exit the context manager
2111 # index when we exit the context manager
2112
2112
2113 nodemaputil.setup_persistent_nodemap(tr, self)
2113 nodemaputil.setup_persistent_nodemap(tr, self)
2114 self._segmentfile = randomaccessfile.randomaccessfile(
2114 self._segmentfile = randomaccessfile.randomaccessfile(
2115 self.opener,
2115 self.opener,
2116 self._datafile,
2116 self._datafile,
2117 self._chunkcachesize,
2117 self._chunkcachesize,
2118 )
2118 )
2119
2119
2120 if existing_handles:
2120 if existing_handles:
2121 # switched from inline to conventional reopen the index
2121 # switched from inline to conventional reopen the index
2122 ifh = self.__index_write_fp()
2122 ifh = self.__index_write_fp()
2123 self._writinghandles = (ifh, new_dfh, None)
2123 self._writinghandles = (ifh, new_dfh, None)
2124 self._segmentfile.writing_handle = new_dfh
2124 self._segmentfile.writing_handle = new_dfh
2125 new_dfh = None
2125 new_dfh = None
2126 # No need to deal with sidedata writing handle as it is only
2126 # No need to deal with sidedata writing handle as it is only
2127 # relevant with revlog-v2 which is never inline, not reaching
2127 # relevant with revlog-v2 which is never inline, not reaching
2128 # this code
2128 # this code
2129 finally:
2129 finally:
2130 if new_dfh is not None:
2130 if new_dfh is not None:
2131 new_dfh.close()
2131 new_dfh.close()
2132
2132
2133 def _nodeduplicatecallback(self, transaction, node):
2133 def _nodeduplicatecallback(self, transaction, node):
2134 """called when trying to add a node already stored."""
2134 """called when trying to add a node already stored."""
2135
2135
2136 @contextlib.contextmanager
2136 @contextlib.contextmanager
2137 def reading(self):
2137 def reading(self):
2138 """Context manager that keeps data and sidedata files open for reading"""
2138 """Context manager that keeps data and sidedata files open for reading"""
2139 with self._segmentfile.reading():
2139 with self._segmentfile.reading():
2140 with self._segmentfile_sidedata.reading():
2140 with self._segmentfile_sidedata.reading():
2141 yield
2141 yield
2142
2142
2143 @contextlib.contextmanager
2143 @contextlib.contextmanager
2144 def _writing(self, transaction):
2144 def _writing(self, transaction):
2145 if self._trypending:
2145 if self._trypending:
2146 msg = b'try to write in a `trypending` revlog: %s'
2146 msg = b'try to write in a `trypending` revlog: %s'
2147 msg %= self.display_id
2147 msg %= self.display_id
2148 raise error.ProgrammingError(msg)
2148 raise error.ProgrammingError(msg)
2149 if self._writinghandles is not None:
2149 if self._writinghandles is not None:
2150 yield
2150 yield
2151 else:
2151 else:
2152 ifh = dfh = sdfh = None
2152 ifh = dfh = sdfh = None
2153 try:
2153 try:
2154 r = len(self)
2154 r = len(self)
2155 # opening the data file.
2155 # opening the data file.
2156 dsize = 0
2156 dsize = 0
2157 if r:
2157 if r:
2158 dsize = self.end(r - 1)
2158 dsize = self.end(r - 1)
2159 dfh = None
2159 dfh = None
2160 if not self._inline:
2160 if not self._inline:
2161 try:
2161 try:
2162 dfh = self._datafp(b"r+")
2162 dfh = self._datafp(b"r+")
2163 if self._docket is None:
2163 if self._docket is None:
2164 dfh.seek(0, os.SEEK_END)
2164 dfh.seek(0, os.SEEK_END)
2165 else:
2165 else:
2166 dfh.seek(self._docket.data_end, os.SEEK_SET)
2166 dfh.seek(self._docket.data_end, os.SEEK_SET)
2167 except FileNotFoundError:
2167 except FileNotFoundError:
2168 dfh = self._datafp(b"w+")
2168 dfh = self._datafp(b"w+")
2169 transaction.add(self._datafile, dsize)
2169 transaction.add(self._datafile, dsize)
2170 if self._sidedatafile is not None:
2170 if self._sidedatafile is not None:
2171 # revlog-v2 does not inline, help Pytype
2171 # revlog-v2 does not inline, help Pytype
2172 assert dfh is not None
2172 assert dfh is not None
2173 try:
2173 try:
2174 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2174 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2175 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2175 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2176 except FileNotFoundError:
2176 except FileNotFoundError:
2177 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2177 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2178 transaction.add(
2178 transaction.add(
2179 self._sidedatafile, self._docket.sidedata_end
2179 self._sidedatafile, self._docket.sidedata_end
2180 )
2180 )
2181
2181
2182 # opening the index file.
2182 # opening the index file.
2183 isize = r * self.index.entry_size
2183 isize = r * self.index.entry_size
2184 ifh = self.__index_write_fp()
2184 ifh = self.__index_write_fp()
2185 if self._inline:
2185 if self._inline:
2186 transaction.add(self._indexfile, dsize + isize)
2186 transaction.add(self._indexfile, dsize + isize)
2187 else:
2187 else:
2188 transaction.add(self._indexfile, isize)
2188 transaction.add(self._indexfile, isize)
2189 # exposing all file handle for writing.
2189 # exposing all file handle for writing.
2190 self._writinghandles = (ifh, dfh, sdfh)
2190 self._writinghandles = (ifh, dfh, sdfh)
2191 self._segmentfile.writing_handle = ifh if self._inline else dfh
2191 self._segmentfile.writing_handle = ifh if self._inline else dfh
2192 self._segmentfile_sidedata.writing_handle = sdfh
2192 self._segmentfile_sidedata.writing_handle = sdfh
2193 yield
2193 yield
2194 if self._docket is not None:
2194 if self._docket is not None:
2195 self._write_docket(transaction)
2195 self._write_docket(transaction)
2196 finally:
2196 finally:
2197 self._writinghandles = None
2197 self._writinghandles = None
2198 self._segmentfile.writing_handle = None
2198 self._segmentfile.writing_handle = None
2199 self._segmentfile_sidedata.writing_handle = None
2199 self._segmentfile_sidedata.writing_handle = None
2200 if dfh is not None:
2200 if dfh is not None:
2201 dfh.close()
2201 dfh.close()
2202 if sdfh is not None:
2202 if sdfh is not None:
2203 sdfh.close()
2203 sdfh.close()
2204 # closing the index file last to avoid exposing referent to
2204 # closing the index file last to avoid exposing referent to
2205 # potential unflushed data content.
2205 # potential unflushed data content.
2206 if ifh is not None:
2206 if ifh is not None:
2207 ifh.close()
2207 ifh.close()
2208
2208
2209 def _write_docket(self, transaction):
2209 def _write_docket(self, transaction):
2210 """write the current docket on disk
2210 """write the current docket on disk
2211
2211
2212 Exist as a method to help changelog to implement transaction logic
2212 Exist as a method to help changelog to implement transaction logic
2213
2213
2214 We could also imagine using the same transaction logic for all revlog
2214 We could also imagine using the same transaction logic for all revlog
2215 since docket are cheap."""
2215 since docket are cheap."""
2216 self._docket.write(transaction)
2216 self._docket.write(transaction)
2217
2217
2218 def addrevision(
2218 def addrevision(
2219 self,
2219 self,
2220 text,
2220 text,
2221 transaction,
2221 transaction,
2222 link,
2222 link,
2223 p1,
2223 p1,
2224 p2,
2224 p2,
2225 cachedelta=None,
2225 cachedelta=None,
2226 node=None,
2226 node=None,
2227 flags=REVIDX_DEFAULT_FLAGS,
2227 flags=REVIDX_DEFAULT_FLAGS,
2228 deltacomputer=None,
2228 deltacomputer=None,
2229 sidedata=None,
2229 sidedata=None,
2230 ):
2230 ):
2231 """add a revision to the log
2231 """add a revision to the log
2232
2232
2233 text - the revision data to add
2233 text - the revision data to add
2234 transaction - the transaction object used for rollback
2234 transaction - the transaction object used for rollback
2235 link - the linkrev data to add
2235 link - the linkrev data to add
2236 p1, p2 - the parent nodeids of the revision
2236 p1, p2 - the parent nodeids of the revision
2237 cachedelta - an optional precomputed delta
2237 cachedelta - an optional precomputed delta
2238 node - nodeid of revision; typically node is not specified, and it is
2238 node - nodeid of revision; typically node is not specified, and it is
2239 computed by default as hash(text, p1, p2), however subclasses might
2239 computed by default as hash(text, p1, p2), however subclasses might
2240 use different hashing method (and override checkhash() in such case)
2240 use different hashing method (and override checkhash() in such case)
2241 flags - the known flags to set on the revision
2241 flags - the known flags to set on the revision
2242 deltacomputer - an optional deltacomputer instance shared between
2242 deltacomputer - an optional deltacomputer instance shared between
2243 multiple calls
2243 multiple calls
2244 """
2244 """
2245 if link == nullrev:
2245 if link == nullrev:
2246 raise error.RevlogError(
2246 raise error.RevlogError(
2247 _(b"attempted to add linkrev -1 to %s") % self.display_id
2247 _(b"attempted to add linkrev -1 to %s") % self.display_id
2248 )
2248 )
2249
2249
2250 if sidedata is None:
2250 if sidedata is None:
2251 sidedata = {}
2251 sidedata = {}
2252 elif sidedata and not self.hassidedata:
2252 elif sidedata and not self.hassidedata:
2253 raise error.ProgrammingError(
2253 raise error.ProgrammingError(
2254 _(b"trying to add sidedata to a revlog who don't support them")
2254 _(b"trying to add sidedata to a revlog who don't support them")
2255 )
2255 )
2256
2256
2257 if flags:
2257 if flags:
2258 node = node or self.hash(text, p1, p2)
2258 node = node or self.hash(text, p1, p2)
2259
2259
2260 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2260 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2261
2261
2262 # If the flag processor modifies the revision data, ignore any provided
2262 # If the flag processor modifies the revision data, ignore any provided
2263 # cachedelta.
2263 # cachedelta.
2264 if rawtext != text:
2264 if rawtext != text:
2265 cachedelta = None
2265 cachedelta = None
2266
2266
2267 if len(rawtext) > _maxentrysize:
2267 if len(rawtext) > _maxentrysize:
2268 raise error.RevlogError(
2268 raise error.RevlogError(
2269 _(
2269 _(
2270 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2270 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2271 )
2271 )
2272 % (self.display_id, len(rawtext))
2272 % (self.display_id, len(rawtext))
2273 )
2273 )
2274
2274
2275 node = node or self.hash(rawtext, p1, p2)
2275 node = node or self.hash(rawtext, p1, p2)
2276 rev = self.index.get_rev(node)
2276 rev = self.index.get_rev(node)
2277 if rev is not None:
2277 if rev is not None:
2278 return rev
2278 return rev
2279
2279
2280 if validatehash:
2280 if validatehash:
2281 self.checkhash(rawtext, node, p1=p1, p2=p2)
2281 self.checkhash(rawtext, node, p1=p1, p2=p2)
2282
2282
2283 return self.addrawrevision(
2283 return self.addrawrevision(
2284 rawtext,
2284 rawtext,
2285 transaction,
2285 transaction,
2286 link,
2286 link,
2287 p1,
2287 p1,
2288 p2,
2288 p2,
2289 node,
2289 node,
2290 flags,
2290 flags,
2291 cachedelta=cachedelta,
2291 cachedelta=cachedelta,
2292 deltacomputer=deltacomputer,
2292 deltacomputer=deltacomputer,
2293 sidedata=sidedata,
2293 sidedata=sidedata,
2294 )
2294 )
2295
2295
2296 def addrawrevision(
2296 def addrawrevision(
2297 self,
2297 self,
2298 rawtext,
2298 rawtext,
2299 transaction,
2299 transaction,
2300 link,
2300 link,
2301 p1,
2301 p1,
2302 p2,
2302 p2,
2303 node,
2303 node,
2304 flags,
2304 flags,
2305 cachedelta=None,
2305 cachedelta=None,
2306 deltacomputer=None,
2306 deltacomputer=None,
2307 sidedata=None,
2307 sidedata=None,
2308 ):
2308 ):
2309 """add a raw revision with known flags, node and parents
2309 """add a raw revision with known flags, node and parents
2310 useful when reusing a revision not stored in this revlog (ex: received
2310 useful when reusing a revision not stored in this revlog (ex: received
2311 over wire, or read from an external bundle).
2311 over wire, or read from an external bundle).
2312 """
2312 """
2313 with self._writing(transaction):
2313 with self._writing(transaction):
2314 return self._addrevision(
2314 return self._addrevision(
2315 node,
2315 node,
2316 rawtext,
2316 rawtext,
2317 transaction,
2317 transaction,
2318 link,
2318 link,
2319 p1,
2319 p1,
2320 p2,
2320 p2,
2321 flags,
2321 flags,
2322 cachedelta,
2322 cachedelta,
2323 deltacomputer=deltacomputer,
2323 deltacomputer=deltacomputer,
2324 sidedata=sidedata,
2324 sidedata=sidedata,
2325 )
2325 )
2326
2326
2327 def compress(self, data):
2327 def compress(self, data):
2328 """Generate a possibly-compressed representation of data."""
2328 """Generate a possibly-compressed representation of data."""
2329 if not data:
2329 if not data:
2330 return b'', data
2330 return b'', data
2331
2331
2332 compressed = self._compressor.compress(data)
2332 compressed = self._compressor.compress(data)
2333
2333
2334 if compressed:
2334 if compressed:
2335 # The revlog compressor added the header in the returned data.
2335 # The revlog compressor added the header in the returned data.
2336 return b'', compressed
2336 return b'', compressed
2337
2337
2338 if data[0:1] == b'\0':
2338 if data[0:1] == b'\0':
2339 return b'', data
2339 return b'', data
2340 return b'u', data
2340 return b'u', data
2341
2341
2342 def decompress(self, data):
2342 def decompress(self, data):
2343 """Decompress a revlog chunk.
2343 """Decompress a revlog chunk.
2344
2344
2345 The chunk is expected to begin with a header identifying the
2345 The chunk is expected to begin with a header identifying the
2346 format type so it can be routed to an appropriate decompressor.
2346 format type so it can be routed to an appropriate decompressor.
2347 """
2347 """
2348 if not data:
2348 if not data:
2349 return data
2349 return data
2350
2350
2351 # Revlogs are read much more frequently than they are written and many
2351 # Revlogs are read much more frequently than they are written and many
2352 # chunks only take microseconds to decompress, so performance is
2352 # chunks only take microseconds to decompress, so performance is
2353 # important here.
2353 # important here.
2354 #
2354 #
2355 # We can make a few assumptions about revlogs:
2355 # We can make a few assumptions about revlogs:
2356 #
2356 #
2357 # 1) the majority of chunks will be compressed (as opposed to inline
2357 # 1) the majority of chunks will be compressed (as opposed to inline
2358 # raw data).
2358 # raw data).
2359 # 2) decompressing *any* data will likely by at least 10x slower than
2359 # 2) decompressing *any* data will likely by at least 10x slower than
2360 # returning raw inline data.
2360 # returning raw inline data.
2361 # 3) we want to prioritize common and officially supported compression
2361 # 3) we want to prioritize common and officially supported compression
2362 # engines
2362 # engines
2363 #
2363 #
2364 # It follows that we want to optimize for "decompress compressed data
2364 # It follows that we want to optimize for "decompress compressed data
2365 # when encoded with common and officially supported compression engines"
2365 # when encoded with common and officially supported compression engines"
2366 # case over "raw data" and "data encoded by less common or non-official
2366 # case over "raw data" and "data encoded by less common or non-official
2367 # compression engines." That is why we have the inline lookup first
2367 # compression engines." That is why we have the inline lookup first
2368 # followed by the compengines lookup.
2368 # followed by the compengines lookup.
2369 #
2369 #
2370 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2370 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2371 # compressed chunks. And this matters for changelog and manifest reads.
2371 # compressed chunks. And this matters for changelog and manifest reads.
2372 t = data[0:1]
2372 t = data[0:1]
2373
2373
2374 if t == b'x':
2374 if t == b'x':
2375 try:
2375 try:
2376 return _zlibdecompress(data)
2376 return _zlibdecompress(data)
2377 except zlib.error as e:
2377 except zlib.error as e:
2378 raise error.RevlogError(
2378 raise error.RevlogError(
2379 _(b'revlog decompress error: %s')
2379 _(b'revlog decompress error: %s')
2380 % stringutil.forcebytestr(e)
2380 % stringutil.forcebytestr(e)
2381 )
2381 )
2382 # '\0' is more common than 'u' so it goes first.
2382 # '\0' is more common than 'u' so it goes first.
2383 elif t == b'\0':
2383 elif t == b'\0':
2384 return data
2384 return data
2385 elif t == b'u':
2385 elif t == b'u':
2386 return util.buffer(data, 1)
2386 return util.buffer(data, 1)
2387
2387
2388 compressor = self._get_decompressor(t)
2388 compressor = self._get_decompressor(t)
2389
2389
2390 return compressor.decompress(data)
2390 return compressor.decompress(data)
2391
2391
2392 def _addrevision(
2392 def _addrevision(
2393 self,
2393 self,
2394 node,
2394 node,
2395 rawtext,
2395 rawtext,
2396 transaction,
2396 transaction,
2397 link,
2397 link,
2398 p1,
2398 p1,
2399 p2,
2399 p2,
2400 flags,
2400 flags,
2401 cachedelta,
2401 cachedelta,
2402 alwayscache=False,
2402 alwayscache=False,
2403 deltacomputer=None,
2403 deltacomputer=None,
2404 sidedata=None,
2404 sidedata=None,
2405 ):
2405 ):
2406 """internal function to add revisions to the log
2406 """internal function to add revisions to the log
2407
2407
2408 see addrevision for argument descriptions.
2408 see addrevision for argument descriptions.
2409
2409
2410 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2410 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2411
2411
2412 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2412 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2413 be used.
2413 be used.
2414
2414
2415 invariants:
2415 invariants:
2416 - rawtext is optional (can be None); if not set, cachedelta must be set.
2416 - rawtext is optional (can be None); if not set, cachedelta must be set.
2417 if both are set, they must correspond to each other.
2417 if both are set, they must correspond to each other.
2418 """
2418 """
2419 if node == self.nullid:
2419 if node == self.nullid:
2420 raise error.RevlogError(
2420 raise error.RevlogError(
2421 _(b"%s: attempt to add null revision") % self.display_id
2421 _(b"%s: attempt to add null revision") % self.display_id
2422 )
2422 )
2423 if (
2423 if (
2424 node == self.nodeconstants.wdirid
2424 node == self.nodeconstants.wdirid
2425 or node in self.nodeconstants.wdirfilenodeids
2425 or node in self.nodeconstants.wdirfilenodeids
2426 ):
2426 ):
2427 raise error.RevlogError(
2427 raise error.RevlogError(
2428 _(b"%s: attempt to add wdir revision") % self.display_id
2428 _(b"%s: attempt to add wdir revision") % self.display_id
2429 )
2429 )
2430 if self._writinghandles is None:
2430 if self._writinghandles is None:
2431 msg = b'adding revision outside `revlog._writing` context'
2431 msg = b'adding revision outside `revlog._writing` context'
2432 raise error.ProgrammingError(msg)
2432 raise error.ProgrammingError(msg)
2433
2433
2434 if self._inline:
2434 if self._inline:
2435 fh = self._writinghandles[0]
2435 fh = self._writinghandles[0]
2436 else:
2436 else:
2437 fh = self._writinghandles[1]
2437 fh = self._writinghandles[1]
2438
2438
2439 btext = [rawtext]
2439 btext = [rawtext]
2440
2440
2441 curr = len(self)
2441 curr = len(self)
2442 prev = curr - 1
2442 prev = curr - 1
2443
2443
2444 offset = self._get_data_offset(prev)
2444 offset = self._get_data_offset(prev)
2445
2445
2446 if self._concurrencychecker:
2446 if self._concurrencychecker:
2447 ifh, dfh, sdfh = self._writinghandles
2447 ifh, dfh, sdfh = self._writinghandles
2448 # XXX no checking for the sidedata file
2448 # XXX no checking for the sidedata file
2449 if self._inline:
2449 if self._inline:
2450 # offset is "as if" it were in the .d file, so we need to add on
2450 # offset is "as if" it were in the .d file, so we need to add on
2451 # the size of the entry metadata.
2451 # the size of the entry metadata.
2452 self._concurrencychecker(
2452 self._concurrencychecker(
2453 ifh, self._indexfile, offset + curr * self.index.entry_size
2453 ifh, self._indexfile, offset + curr * self.index.entry_size
2454 )
2454 )
2455 else:
2455 else:
2456 # Entries in the .i are a consistent size.
2456 # Entries in the .i are a consistent size.
2457 self._concurrencychecker(
2457 self._concurrencychecker(
2458 ifh, self._indexfile, curr * self.index.entry_size
2458 ifh, self._indexfile, curr * self.index.entry_size
2459 )
2459 )
2460 self._concurrencychecker(dfh, self._datafile, offset)
2460 self._concurrencychecker(dfh, self._datafile, offset)
2461
2461
2462 p1r, p2r = self.rev(p1), self.rev(p2)
2462 p1r, p2r = self.rev(p1), self.rev(p2)
2463
2463
2464 # full versions are inserted when the needed deltas
2464 # full versions are inserted when the needed deltas
2465 # become comparable to the uncompressed text
2465 # become comparable to the uncompressed text
2466 if rawtext is None:
2466 if rawtext is None:
2467 # need rawtext size, before changed by flag processors, which is
2467 # need rawtext size, before changed by flag processors, which is
2468 # the non-raw size. use revlog explicitly to avoid filelog's extra
2468 # the non-raw size. use revlog explicitly to avoid filelog's extra
2469 # logic that might remove metadata size.
2469 # logic that might remove metadata size.
2470 textlen = mdiff.patchedsize(
2470 textlen = mdiff.patchedsize(
2471 revlog.size(self, cachedelta[0]), cachedelta[1]
2471 revlog.size(self, cachedelta[0]), cachedelta[1]
2472 )
2472 )
2473 else:
2473 else:
2474 textlen = len(rawtext)
2474 textlen = len(rawtext)
2475
2475
2476 if deltacomputer is None:
2476 if deltacomputer is None:
2477 write_debug = None
2477 write_debug = None
2478 if self._debug_delta:
2478 if self._debug_delta:
2479 write_debug = transaction._report
2479 write_debug = transaction._report
2480 deltacomputer = deltautil.deltacomputer(
2480 deltacomputer = deltautil.deltacomputer(
2481 self, write_debug=write_debug
2481 self, write_debug=write_debug
2482 )
2482 )
2483
2483
2484 if cachedelta is not None and len(cachedelta) == 2:
2484 if cachedelta is not None and len(cachedelta) == 2:
2485 # If the cached delta has no information about how it should be
2485 # If the cached delta has no information about how it should be
2486 # reused, add the default reuse instruction according to the
2486 # reused, add the default reuse instruction according to the
2487 # revlog's configuration.
2487 # revlog's configuration.
2488 if self._generaldelta and self._lazydeltabase:
2488 if self._generaldelta and self._lazydeltabase:
2489 delta_base_reuse = DELTA_BASE_REUSE_TRY
2489 delta_base_reuse = DELTA_BASE_REUSE_TRY
2490 else:
2490 else:
2491 delta_base_reuse = DELTA_BASE_REUSE_NO
2491 delta_base_reuse = DELTA_BASE_REUSE_NO
2492 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2492 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2493
2493
2494 revinfo = revlogutils.revisioninfo(
2494 revinfo = revlogutils.revisioninfo(
2495 node,
2495 node,
2496 p1,
2496 p1,
2497 p2,
2497 p2,
2498 btext,
2498 btext,
2499 textlen,
2499 textlen,
2500 cachedelta,
2500 cachedelta,
2501 flags,
2501 flags,
2502 )
2502 )
2503
2503
2504 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2504 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2505
2505
2506 compression_mode = COMP_MODE_INLINE
2506 compression_mode = COMP_MODE_INLINE
2507 if self._docket is not None:
2507 if self._docket is not None:
2508 default_comp = self._docket.default_compression_header
2508 default_comp = self._docket.default_compression_header
2509 r = deltautil.delta_compression(default_comp, deltainfo)
2509 r = deltautil.delta_compression(default_comp, deltainfo)
2510 compression_mode, deltainfo = r
2510 compression_mode, deltainfo = r
2511
2511
2512 sidedata_compression_mode = COMP_MODE_INLINE
2512 sidedata_compression_mode = COMP_MODE_INLINE
2513 if sidedata and self.hassidedata:
2513 if sidedata and self.hassidedata:
2514 sidedata_compression_mode = COMP_MODE_PLAIN
2514 sidedata_compression_mode = COMP_MODE_PLAIN
2515 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2515 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2516 sidedata_offset = self._docket.sidedata_end
2516 sidedata_offset = self._docket.sidedata_end
2517 h, comp_sidedata = self.compress(serialized_sidedata)
2517 h, comp_sidedata = self.compress(serialized_sidedata)
2518 if (
2518 if (
2519 h != b'u'
2519 h != b'u'
2520 and comp_sidedata[0:1] != b'\0'
2520 and comp_sidedata[0:1] != b'\0'
2521 and len(comp_sidedata) < len(serialized_sidedata)
2521 and len(comp_sidedata) < len(serialized_sidedata)
2522 ):
2522 ):
2523 assert not h
2523 assert not h
2524 if (
2524 if (
2525 comp_sidedata[0:1]
2525 comp_sidedata[0:1]
2526 == self._docket.default_compression_header
2526 == self._docket.default_compression_header
2527 ):
2527 ):
2528 sidedata_compression_mode = COMP_MODE_DEFAULT
2528 sidedata_compression_mode = COMP_MODE_DEFAULT
2529 serialized_sidedata = comp_sidedata
2529 serialized_sidedata = comp_sidedata
2530 else:
2530 else:
2531 sidedata_compression_mode = COMP_MODE_INLINE
2531 sidedata_compression_mode = COMP_MODE_INLINE
2532 serialized_sidedata = comp_sidedata
2532 serialized_sidedata = comp_sidedata
2533 else:
2533 else:
2534 serialized_sidedata = b""
2534 serialized_sidedata = b""
2535 # Don't store the offset if the sidedata is empty, that way
2535 # Don't store the offset if the sidedata is empty, that way
2536 # we can easily detect empty sidedata and they will be no different
2536 # we can easily detect empty sidedata and they will be no different
2537 # than ones we manually add.
2537 # than ones we manually add.
2538 sidedata_offset = 0
2538 sidedata_offset = 0
2539
2539
2540 rank = RANK_UNKNOWN
2540 rank = RANK_UNKNOWN
2541 if self._compute_rank:
2541 if self._compute_rank:
2542 if (p1r, p2r) == (nullrev, nullrev):
2542 if (p1r, p2r) == (nullrev, nullrev):
2543 rank = 1
2543 rank = 1
2544 elif p1r != nullrev and p2r == nullrev:
2544 elif p1r != nullrev and p2r == nullrev:
2545 rank = 1 + self.fast_rank(p1r)
2545 rank = 1 + self.fast_rank(p1r)
2546 elif p1r == nullrev and p2r != nullrev:
2546 elif p1r == nullrev and p2r != nullrev:
2547 rank = 1 + self.fast_rank(p2r)
2547 rank = 1 + self.fast_rank(p2r)
2548 else: # merge node
2548 else: # merge node
2549 if rustdagop is not None and self.index.rust_ext_compat:
2549 if rustdagop is not None and self.index.rust_ext_compat:
2550 rank = rustdagop.rank(self.index, p1r, p2r)
2550 rank = rustdagop.rank(self.index, p1r, p2r)
2551 else:
2551 else:
2552 pmin, pmax = sorted((p1r, p2r))
2552 pmin, pmax = sorted((p1r, p2r))
2553 rank = 1 + self.fast_rank(pmax)
2553 rank = 1 + self.fast_rank(pmax)
2554 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2554 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2555
2555
2556 e = revlogutils.entry(
2556 e = revlogutils.entry(
2557 flags=flags,
2557 flags=flags,
2558 data_offset=offset,
2558 data_offset=offset,
2559 data_compressed_length=deltainfo.deltalen,
2559 data_compressed_length=deltainfo.deltalen,
2560 data_uncompressed_length=textlen,
2560 data_uncompressed_length=textlen,
2561 data_compression_mode=compression_mode,
2561 data_compression_mode=compression_mode,
2562 data_delta_base=deltainfo.base,
2562 data_delta_base=deltainfo.base,
2563 link_rev=link,
2563 link_rev=link,
2564 parent_rev_1=p1r,
2564 parent_rev_1=p1r,
2565 parent_rev_2=p2r,
2565 parent_rev_2=p2r,
2566 node_id=node,
2566 node_id=node,
2567 sidedata_offset=sidedata_offset,
2567 sidedata_offset=sidedata_offset,
2568 sidedata_compressed_length=len(serialized_sidedata),
2568 sidedata_compressed_length=len(serialized_sidedata),
2569 sidedata_compression_mode=sidedata_compression_mode,
2569 sidedata_compression_mode=sidedata_compression_mode,
2570 rank=rank,
2570 rank=rank,
2571 )
2571 )
2572
2572
2573 self.index.append(e)
2573 self.index.append(e)
2574 entry = self.index.entry_binary(curr)
2574 entry = self.index.entry_binary(curr)
2575 if curr == 0 and self._docket is None:
2575 if curr == 0 and self._docket is None:
2576 header = self._format_flags | self._format_version
2576 header = self._format_flags | self._format_version
2577 header = self.index.pack_header(header)
2577 header = self.index.pack_header(header)
2578 entry = header + entry
2578 entry = header + entry
2579 self._writeentry(
2579 self._writeentry(
2580 transaction,
2580 transaction,
2581 entry,
2581 entry,
2582 deltainfo.data,
2582 deltainfo.data,
2583 link,
2583 link,
2584 offset,
2584 offset,
2585 serialized_sidedata,
2585 serialized_sidedata,
2586 sidedata_offset,
2586 sidedata_offset,
2587 )
2587 )
2588
2588
2589 rawtext = btext[0]
2589 rawtext = btext[0]
2590
2590
2591 if alwayscache and rawtext is None:
2591 if alwayscache and rawtext is None:
2592 rawtext = deltacomputer.buildtext(revinfo, fh)
2592 rawtext = deltacomputer.buildtext(revinfo, fh)
2593
2593
2594 if type(rawtext) == bytes: # only accept immutable objects
2594 if type(rawtext) == bytes: # only accept immutable objects
2595 self._revisioncache = (node, curr, rawtext)
2595 self._revisioncache = (node, curr, rawtext)
2596 self._chainbasecache[curr] = deltainfo.chainbase
2596 self._chainbasecache[curr] = deltainfo.chainbase
2597 return curr
2597 return curr
2598
2598
2599 def _get_data_offset(self, prev):
2599 def _get_data_offset(self, prev):
2600 """Returns the current offset in the (in-transaction) data file.
2600 """Returns the current offset in the (in-transaction) data file.
2601 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2601 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2602 file to store that information: since sidedata can be rewritten to the
2602 file to store that information: since sidedata can be rewritten to the
2603 end of the data file within a transaction, you can have cases where, for
2603 end of the data file within a transaction, you can have cases where, for
2604 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2604 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2605 to `n - 1`'s sidedata being written after `n`'s data.
2605 to `n - 1`'s sidedata being written after `n`'s data.
2606
2606
2607 TODO cache this in a docket file before getting out of experimental."""
2607 TODO cache this in a docket file before getting out of experimental."""
2608 if self._docket is None:
2608 if self._docket is None:
2609 return self.end(prev)
2609 return self.end(prev)
2610 else:
2610 else:
2611 return self._docket.data_end
2611 return self._docket.data_end
2612
2612
2613 def _writeentry(
2613 def _writeentry(
2614 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2614 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2615 ):
2615 ):
2616 # Files opened in a+ mode have inconsistent behavior on various
2616 # Files opened in a+ mode have inconsistent behavior on various
2617 # platforms. Windows requires that a file positioning call be made
2617 # platforms. Windows requires that a file positioning call be made
2618 # when the file handle transitions between reads and writes. See
2618 # when the file handle transitions between reads and writes. See
2619 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2619 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2620 # platforms, Python or the platform itself can be buggy. Some versions
2620 # platforms, Python or the platform itself can be buggy. Some versions
2621 # of Solaris have been observed to not append at the end of the file
2621 # of Solaris have been observed to not append at the end of the file
2622 # if the file was seeked to before the end. See issue4943 for more.
2622 # if the file was seeked to before the end. See issue4943 for more.
2623 #
2623 #
2624 # We work around this issue by inserting a seek() before writing.
2624 # We work around this issue by inserting a seek() before writing.
2625 # Note: This is likely not necessary on Python 3. However, because
2625 # Note: This is likely not necessary on Python 3. However, because
2626 # the file handle is reused for reads and may be seeked there, we need
2626 # the file handle is reused for reads and may be seeked there, we need
2627 # to be careful before changing this.
2627 # to be careful before changing this.
2628 if self._writinghandles is None:
2628 if self._writinghandles is None:
2629 msg = b'adding revision outside `revlog._writing` context'
2629 msg = b'adding revision outside `revlog._writing` context'
2630 raise error.ProgrammingError(msg)
2630 raise error.ProgrammingError(msg)
2631 ifh, dfh, sdfh = self._writinghandles
2631 ifh, dfh, sdfh = self._writinghandles
2632 if self._docket is None:
2632 if self._docket is None:
2633 ifh.seek(0, os.SEEK_END)
2633 ifh.seek(0, os.SEEK_END)
2634 else:
2634 else:
2635 ifh.seek(self._docket.index_end, os.SEEK_SET)
2635 ifh.seek(self._docket.index_end, os.SEEK_SET)
2636 if dfh:
2636 if dfh:
2637 if self._docket is None:
2637 if self._docket is None:
2638 dfh.seek(0, os.SEEK_END)
2638 dfh.seek(0, os.SEEK_END)
2639 else:
2639 else:
2640 dfh.seek(self._docket.data_end, os.SEEK_SET)
2640 dfh.seek(self._docket.data_end, os.SEEK_SET)
2641 if sdfh:
2641 if sdfh:
2642 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2642 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2643
2643
2644 curr = len(self) - 1
2644 curr = len(self) - 1
2645 if not self._inline:
2645 if not self._inline:
2646 transaction.add(self._datafile, offset)
2646 transaction.add(self._datafile, offset)
2647 if self._sidedatafile:
2647 if self._sidedatafile:
2648 transaction.add(self._sidedatafile, sidedata_offset)
2648 transaction.add(self._sidedatafile, sidedata_offset)
2649 transaction.add(self._indexfile, curr * len(entry))
2649 transaction.add(self._indexfile, curr * len(entry))
2650 if data[0]:
2650 if data[0]:
2651 dfh.write(data[0])
2651 dfh.write(data[0])
2652 dfh.write(data[1])
2652 dfh.write(data[1])
2653 if sidedata:
2653 if sidedata:
2654 sdfh.write(sidedata)
2654 sdfh.write(sidedata)
2655 ifh.write(entry)
2655 ifh.write(entry)
2656 else:
2656 else:
2657 offset += curr * self.index.entry_size
2657 offset += curr * self.index.entry_size
2658 transaction.add(self._indexfile, offset)
2658 transaction.add(self._indexfile, offset)
2659 ifh.write(entry)
2659 ifh.write(entry)
2660 ifh.write(data[0])
2660 ifh.write(data[0])
2661 ifh.write(data[1])
2661 ifh.write(data[1])
2662 assert not sidedata
2662 assert not sidedata
2663 self._enforceinlinesize(transaction)
2663 self._enforceinlinesize(transaction)
2664 if self._docket is not None:
2664 if self._docket is not None:
2665 # revlog-v2 always has 3 writing handles, help Pytype
2665 # revlog-v2 always has 3 writing handles, help Pytype
2666 wh1 = self._writinghandles[0]
2666 wh1 = self._writinghandles[0]
2667 wh2 = self._writinghandles[1]
2667 wh2 = self._writinghandles[1]
2668 wh3 = self._writinghandles[2]
2668 wh3 = self._writinghandles[2]
2669 assert wh1 is not None
2669 assert wh1 is not None
2670 assert wh2 is not None
2670 assert wh2 is not None
2671 assert wh3 is not None
2671 assert wh3 is not None
2672 self._docket.index_end = wh1.tell()
2672 self._docket.index_end = wh1.tell()
2673 self._docket.data_end = wh2.tell()
2673 self._docket.data_end = wh2.tell()
2674 self._docket.sidedata_end = wh3.tell()
2674 self._docket.sidedata_end = wh3.tell()
2675
2675
2676 nodemaputil.setup_persistent_nodemap(transaction, self)
2676 nodemaputil.setup_persistent_nodemap(transaction, self)
2677
2677
2678 def addgroup(
2678 def addgroup(
2679 self,
2679 self,
2680 deltas,
2680 deltas,
2681 linkmapper,
2681 linkmapper,
2682 transaction,
2682 transaction,
2683 alwayscache=False,
2683 alwayscache=False,
2684 addrevisioncb=None,
2684 addrevisioncb=None,
2685 duplicaterevisioncb=None,
2685 duplicaterevisioncb=None,
2686 debug_info=None,
2686 debug_info=None,
2687 delta_base_reuse_policy=None,
2687 delta_base_reuse_policy=None,
2688 ):
2688 ):
2689 """
2689 """
2690 add a delta group
2690 add a delta group
2691
2691
2692 given a set of deltas, add them to the revision log. the
2692 given a set of deltas, add them to the revision log. the
2693 first delta is against its parent, which should be in our
2693 first delta is against its parent, which should be in our
2694 log, the rest are against the previous delta.
2694 log, the rest are against the previous delta.
2695
2695
2696 If ``addrevisioncb`` is defined, it will be called with arguments of
2696 If ``addrevisioncb`` is defined, it will be called with arguments of
2697 this revlog and the node that was added.
2697 this revlog and the node that was added.
2698 """
2698 """
2699
2699
2700 if self._adding_group:
2700 if self._adding_group:
2701 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2701 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2702
2702
2703 # read the default delta-base reuse policy from revlog config if the
2703 # read the default delta-base reuse policy from revlog config if the
2704 # group did not specify one.
2704 # group did not specify one.
2705 if delta_base_reuse_policy is None:
2705 if delta_base_reuse_policy is None:
2706 if self._generaldelta and self._lazydeltabase:
2706 if self._generaldelta and self._lazydeltabase:
2707 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2707 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2708 else:
2708 else:
2709 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2709 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2710
2710
2711 self._adding_group = True
2711 self._adding_group = True
2712 empty = True
2712 empty = True
2713 try:
2713 try:
2714 with self._writing(transaction):
2714 with self._writing(transaction):
2715 write_debug = None
2715 write_debug = None
2716 if self._debug_delta:
2716 if self._debug_delta:
2717 write_debug = transaction._report
2717 write_debug = transaction._report
2718 deltacomputer = deltautil.deltacomputer(
2718 deltacomputer = deltautil.deltacomputer(
2719 self,
2719 self,
2720 write_debug=write_debug,
2720 write_debug=write_debug,
2721 debug_info=debug_info,
2721 debug_info=debug_info,
2722 )
2722 )
2723 # loop through our set of deltas
2723 # loop through our set of deltas
2724 for data in deltas:
2724 for data in deltas:
2725 (
2725 (
2726 node,
2726 node,
2727 p1,
2727 p1,
2728 p2,
2728 p2,
2729 linknode,
2729 linknode,
2730 deltabase,
2730 deltabase,
2731 delta,
2731 delta,
2732 flags,
2732 flags,
2733 sidedata,
2733 sidedata,
2734 ) = data
2734 ) = data
2735 link = linkmapper(linknode)
2735 link = linkmapper(linknode)
2736 flags = flags or REVIDX_DEFAULT_FLAGS
2736 flags = flags or REVIDX_DEFAULT_FLAGS
2737
2737
2738 rev = self.index.get_rev(node)
2738 rev = self.index.get_rev(node)
2739 if rev is not None:
2739 if rev is not None:
2740 # this can happen if two branches make the same change
2740 # this can happen if two branches make the same change
2741 self._nodeduplicatecallback(transaction, rev)
2741 self._nodeduplicatecallback(transaction, rev)
2742 if duplicaterevisioncb:
2742 if duplicaterevisioncb:
2743 duplicaterevisioncb(self, rev)
2743 duplicaterevisioncb(self, rev)
2744 empty = False
2744 empty = False
2745 continue
2745 continue
2746
2746
2747 for p in (p1, p2):
2747 for p in (p1, p2):
2748 if not self.index.has_node(p):
2748 if not self.index.has_node(p):
2749 raise error.LookupError(
2749 raise error.LookupError(
2750 p, self.radix, _(b'unknown parent')
2750 p, self.radix, _(b'unknown parent')
2751 )
2751 )
2752
2752
2753 if not self.index.has_node(deltabase):
2753 if not self.index.has_node(deltabase):
2754 raise error.LookupError(
2754 raise error.LookupError(
2755 deltabase, self.display_id, _(b'unknown delta base')
2755 deltabase, self.display_id, _(b'unknown delta base')
2756 )
2756 )
2757
2757
2758 baserev = self.rev(deltabase)
2758 baserev = self.rev(deltabase)
2759
2759
2760 if baserev != nullrev and self.iscensored(baserev):
2760 if baserev != nullrev and self.iscensored(baserev):
2761 # if base is censored, delta must be full replacement in a
2761 # if base is censored, delta must be full replacement in a
2762 # single patch operation
2762 # single patch operation
2763 hlen = struct.calcsize(b">lll")
2763 hlen = struct.calcsize(b">lll")
2764 oldlen = self.rawsize(baserev)
2764 oldlen = self.rawsize(baserev)
2765 newlen = len(delta) - hlen
2765 newlen = len(delta) - hlen
2766 if delta[:hlen] != mdiff.replacediffheader(
2766 if delta[:hlen] != mdiff.replacediffheader(
2767 oldlen, newlen
2767 oldlen, newlen
2768 ):
2768 ):
2769 raise error.CensoredBaseError(
2769 raise error.CensoredBaseError(
2770 self.display_id, self.node(baserev)
2770 self.display_id, self.node(baserev)
2771 )
2771 )
2772
2772
2773 if not flags and self._peek_iscensored(baserev, delta):
2773 if not flags and self._peek_iscensored(baserev, delta):
2774 flags |= REVIDX_ISCENSORED
2774 flags |= REVIDX_ISCENSORED
2775
2775
2776 # We assume consumers of addrevisioncb will want to retrieve
2776 # We assume consumers of addrevisioncb will want to retrieve
2777 # the added revision, which will require a call to
2777 # the added revision, which will require a call to
2778 # revision(). revision() will fast path if there is a cache
2778 # revision(). revision() will fast path if there is a cache
2779 # hit. So, we tell _addrevision() to always cache in this case.
2779 # hit. So, we tell _addrevision() to always cache in this case.
2780 # We're only using addgroup() in the context of changegroup
2780 # We're only using addgroup() in the context of changegroup
2781 # generation so the revision data can always be handled as raw
2781 # generation so the revision data can always be handled as raw
2782 # by the flagprocessor.
2782 # by the flagprocessor.
2783 rev = self._addrevision(
2783 rev = self._addrevision(
2784 node,
2784 node,
2785 None,
2785 None,
2786 transaction,
2786 transaction,
2787 link,
2787 link,
2788 p1,
2788 p1,
2789 p2,
2789 p2,
2790 flags,
2790 flags,
2791 (baserev, delta, delta_base_reuse_policy),
2791 (baserev, delta, delta_base_reuse_policy),
2792 alwayscache=alwayscache,
2792 alwayscache=alwayscache,
2793 deltacomputer=deltacomputer,
2793 deltacomputer=deltacomputer,
2794 sidedata=sidedata,
2794 sidedata=sidedata,
2795 )
2795 )
2796
2796
2797 if addrevisioncb:
2797 if addrevisioncb:
2798 addrevisioncb(self, rev)
2798 addrevisioncb(self, rev)
2799 empty = False
2799 empty = False
2800 finally:
2800 finally:
2801 self._adding_group = False
2801 self._adding_group = False
2802 return not empty
2802 return not empty
2803
2803
2804 def iscensored(self, rev):
2804 def iscensored(self, rev):
2805 """Check if a file revision is censored."""
2805 """Check if a file revision is censored."""
2806 if not self._censorable:
2806 if not self._censorable:
2807 return False
2807 return False
2808
2808
2809 return self.flags(rev) & REVIDX_ISCENSORED
2809 return self.flags(rev) & REVIDX_ISCENSORED
2810
2810
2811 def _peek_iscensored(self, baserev, delta):
2811 def _peek_iscensored(self, baserev, delta):
2812 """Quickly check if a delta produces a censored revision."""
2812 """Quickly check if a delta produces a censored revision."""
2813 if not self._censorable:
2813 if not self._censorable:
2814 return False
2814 return False
2815
2815
2816 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2816 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2817
2817
2818 def getstrippoint(self, minlink):
2818 def getstrippoint(self, minlink):
2819 """find the minimum rev that must be stripped to strip the linkrev
2819 """find the minimum rev that must be stripped to strip the linkrev
2820
2820
2821 Returns a tuple containing the minimum rev and a set of all revs that
2821 Returns a tuple containing the minimum rev and a set of all revs that
2822 have linkrevs that will be broken by this strip.
2822 have linkrevs that will be broken by this strip.
2823 """
2823 """
2824 return storageutil.resolvestripinfo(
2824 return storageutil.resolvestripinfo(
2825 minlink,
2825 minlink,
2826 len(self) - 1,
2826 len(self) - 1,
2827 self.headrevs(),
2827 self.headrevs(),
2828 self.linkrev,
2828 self.linkrev,
2829 self.parentrevs,
2829 self.parentrevs,
2830 )
2830 )
2831
2831
2832 def strip(self, minlink, transaction):
2832 def strip(self, minlink, transaction):
2833 """truncate the revlog on the first revision with a linkrev >= minlink
2833 """truncate the revlog on the first revision with a linkrev >= minlink
2834
2834
2835 This function is called when we're stripping revision minlink and
2835 This function is called when we're stripping revision minlink and
2836 its descendants from the repository.
2836 its descendants from the repository.
2837
2837
2838 We have to remove all revisions with linkrev >= minlink, because
2838 We have to remove all revisions with linkrev >= minlink, because
2839 the equivalent changelog revisions will be renumbered after the
2839 the equivalent changelog revisions will be renumbered after the
2840 strip.
2840 strip.
2841
2841
2842 So we truncate the revlog on the first of these revisions, and
2842 So we truncate the revlog on the first of these revisions, and
2843 trust that the caller has saved the revisions that shouldn't be
2843 trust that the caller has saved the revisions that shouldn't be
2844 removed and that it'll re-add them after this truncation.
2844 removed and that it'll re-add them after this truncation.
2845 """
2845 """
2846 if len(self) == 0:
2846 if len(self) == 0:
2847 return
2847 return
2848
2848
2849 rev, _ = self.getstrippoint(minlink)
2849 rev, _ = self.getstrippoint(minlink)
2850 if rev == len(self):
2850 if rev == len(self):
2851 return
2851 return
2852
2852
2853 # first truncate the files on disk
2853 # first truncate the files on disk
2854 data_end = self.start(rev)
2854 data_end = self.start(rev)
2855 if not self._inline:
2855 if not self._inline:
2856 transaction.add(self._datafile, data_end)
2856 transaction.add(self._datafile, data_end)
2857 end = rev * self.index.entry_size
2857 end = rev * self.index.entry_size
2858 else:
2858 else:
2859 end = data_end + (rev * self.index.entry_size)
2859 end = data_end + (rev * self.index.entry_size)
2860
2860
2861 if self._sidedatafile:
2861 if self._sidedatafile:
2862 sidedata_end = self.sidedata_cut_off(rev)
2862 sidedata_end = self.sidedata_cut_off(rev)
2863 transaction.add(self._sidedatafile, sidedata_end)
2863 transaction.add(self._sidedatafile, sidedata_end)
2864
2864
2865 transaction.add(self._indexfile, end)
2865 transaction.add(self._indexfile, end)
2866 if self._docket is not None:
2866 if self._docket is not None:
2867 # XXX we could, leverage the docket while stripping. However it is
2867 # XXX we could, leverage the docket while stripping. However it is
2868 # not powerfull enough at the time of this comment
2868 # not powerfull enough at the time of this comment
2869 self._docket.index_end = end
2869 self._docket.index_end = end
2870 self._docket.data_end = data_end
2870 self._docket.data_end = data_end
2871 self._docket.sidedata_end = sidedata_end
2871 self._docket.sidedata_end = sidedata_end
2872 self._docket.write(transaction, stripping=True)
2872 self._docket.write(transaction, stripping=True)
2873
2873
2874 # then reset internal state in memory to forget those revisions
2874 # then reset internal state in memory to forget those revisions
2875 self._revisioncache = None
2875 self._revisioncache = None
2876 self._chaininfocache = util.lrucachedict(500)
2876 self._chaininfocache = util.lrucachedict(500)
2877 self._segmentfile.clear_cache()
2877 self._segmentfile.clear_cache()
2878 self._segmentfile_sidedata.clear_cache()
2878 self._segmentfile_sidedata.clear_cache()
2879
2879
2880 del self.index[rev:-1]
2880 del self.index[rev:-1]
2881
2881
2882 def checksize(self):
2882 def checksize(self):
2883 """Check size of index and data files
2883 """Check size of index and data files
2884
2884
2885 return a (dd, di) tuple.
2885 return a (dd, di) tuple.
2886 - dd: extra bytes for the "data" file
2886 - dd: extra bytes for the "data" file
2887 - di: extra bytes for the "index" file
2887 - di: extra bytes for the "index" file
2888
2888
2889 A healthy revlog will return (0, 0).
2889 A healthy revlog will return (0, 0).
2890 """
2890 """
2891 expected = 0
2891 expected = 0
2892 if len(self):
2892 if len(self):
2893 expected = max(0, self.end(len(self) - 1))
2893 expected = max(0, self.end(len(self) - 1))
2894
2894
2895 try:
2895 try:
2896 with self._datafp() as f:
2896 with self._datafp() as f:
2897 f.seek(0, io.SEEK_END)
2897 f.seek(0, io.SEEK_END)
2898 actual = f.tell()
2898 actual = f.tell()
2899 dd = actual - expected
2899 dd = actual - expected
2900 except FileNotFoundError:
2900 except FileNotFoundError:
2901 dd = 0
2901 dd = 0
2902
2902
2903 try:
2903 try:
2904 f = self.opener(self._indexfile)
2904 f = self.opener(self._indexfile)
2905 f.seek(0, io.SEEK_END)
2905 f.seek(0, io.SEEK_END)
2906 actual = f.tell()
2906 actual = f.tell()
2907 f.close()
2907 f.close()
2908 s = self.index.entry_size
2908 s = self.index.entry_size
2909 i = max(0, actual // s)
2909 i = max(0, actual // s)
2910 di = actual - (i * s)
2910 di = actual - (i * s)
2911 if self._inline:
2911 if self._inline:
2912 databytes = 0
2912 databytes = 0
2913 for r in self:
2913 for r in self:
2914 databytes += max(0, self.length(r))
2914 databytes += max(0, self.length(r))
2915 dd = 0
2915 dd = 0
2916 di = actual - len(self) * s - databytes
2916 di = actual - len(self) * s - databytes
2917 except FileNotFoundError:
2917 except FileNotFoundError:
2918 di = 0
2918 di = 0
2919
2919
2920 return (dd, di)
2920 return (dd, di)
2921
2921
2922 def files(self):
2922 def files(self):
2923 res = [self._indexfile]
2923 res = [self._indexfile]
2924 if self._docket_file is None:
2924 if self._docket_file is None:
2925 if not self._inline:
2925 if not self._inline:
2926 res.append(self._datafile)
2926 res.append(self._datafile)
2927 else:
2927 else:
2928 res.append(self._docket_file)
2928 res.append(self._docket_file)
2929 res.extend(self._docket.old_index_filepaths(include_empty=False))
2929 res.extend(self._docket.old_index_filepaths(include_empty=False))
2930 if self._docket.data_end:
2930 if self._docket.data_end:
2931 res.append(self._datafile)
2931 res.append(self._datafile)
2932 res.extend(self._docket.old_data_filepaths(include_empty=False))
2932 res.extend(self._docket.old_data_filepaths(include_empty=False))
2933 if self._docket.sidedata_end:
2933 if self._docket.sidedata_end:
2934 res.append(self._sidedatafile)
2934 res.append(self._sidedatafile)
2935 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2935 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
2936 return res
2936 return res
2937
2937
2938 def emitrevisions(
2938 def emitrevisions(
2939 self,
2939 self,
2940 nodes,
2940 nodes,
2941 nodesorder=None,
2941 nodesorder=None,
2942 revisiondata=False,
2942 revisiondata=False,
2943 assumehaveparentrevisions=False,
2943 assumehaveparentrevisions=False,
2944 deltamode=repository.CG_DELTAMODE_STD,
2944 deltamode=repository.CG_DELTAMODE_STD,
2945 sidedata_helpers=None,
2945 sidedata_helpers=None,
2946 debug_info=None,
2946 debug_info=None,
2947 ):
2947 ):
2948 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2948 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2949 raise error.ProgrammingError(
2949 raise error.ProgrammingError(
2950 b'unhandled value for nodesorder: %s' % nodesorder
2950 b'unhandled value for nodesorder: %s' % nodesorder
2951 )
2951 )
2952
2952
2953 if nodesorder is None and not self._generaldelta:
2953 if nodesorder is None and not self._generaldelta:
2954 nodesorder = b'storage'
2954 nodesorder = b'storage'
2955
2955
2956 if (
2956 if (
2957 not self._storedeltachains
2957 not self._storedeltachains
2958 and deltamode != repository.CG_DELTAMODE_PREV
2958 and deltamode != repository.CG_DELTAMODE_PREV
2959 ):
2959 ):
2960 deltamode = repository.CG_DELTAMODE_FULL
2960 deltamode = repository.CG_DELTAMODE_FULL
2961
2961
2962 return storageutil.emitrevisions(
2962 return storageutil.emitrevisions(
2963 self,
2963 self,
2964 nodes,
2964 nodes,
2965 nodesorder,
2965 nodesorder,
2966 revlogrevisiondelta,
2966 revlogrevisiondelta,
2967 deltaparentfn=self.deltaparent,
2967 deltaparentfn=self.deltaparent,
2968 candeltafn=self.candelta,
2968 candeltafn=self.candelta,
2969 rawsizefn=self.rawsize,
2969 rawsizefn=self.rawsize,
2970 revdifffn=self.revdiff,
2970 revdifffn=self.revdiff,
2971 flagsfn=self.flags,
2971 flagsfn=self.flags,
2972 deltamode=deltamode,
2972 deltamode=deltamode,
2973 revisiondata=revisiondata,
2973 revisiondata=revisiondata,
2974 assumehaveparentrevisions=assumehaveparentrevisions,
2974 assumehaveparentrevisions=assumehaveparentrevisions,
2975 sidedata_helpers=sidedata_helpers,
2975 sidedata_helpers=sidedata_helpers,
2976 debug_info=debug_info,
2976 debug_info=debug_info,
2977 )
2977 )
2978
2978
2979 DELTAREUSEALWAYS = b'always'
2979 DELTAREUSEALWAYS = b'always'
2980 DELTAREUSESAMEREVS = b'samerevs'
2980 DELTAREUSESAMEREVS = b'samerevs'
2981 DELTAREUSENEVER = b'never'
2981 DELTAREUSENEVER = b'never'
2982
2982
2983 DELTAREUSEFULLADD = b'fulladd'
2983 DELTAREUSEFULLADD = b'fulladd'
2984
2984
2985 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2985 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2986
2986
2987 def clone(
2987 def clone(
2988 self,
2988 self,
2989 tr,
2989 tr,
2990 destrevlog,
2990 destrevlog,
2991 addrevisioncb=None,
2991 addrevisioncb=None,
2992 deltareuse=DELTAREUSESAMEREVS,
2992 deltareuse=DELTAREUSESAMEREVS,
2993 forcedeltabothparents=None,
2993 forcedeltabothparents=None,
2994 sidedata_helpers=None,
2994 sidedata_helpers=None,
2995 ):
2995 ):
2996 """Copy this revlog to another, possibly with format changes.
2996 """Copy this revlog to another, possibly with format changes.
2997
2997
2998 The destination revlog will contain the same revisions and nodes.
2998 The destination revlog will contain the same revisions and nodes.
2999 However, it may not be bit-for-bit identical due to e.g. delta encoding
2999 However, it may not be bit-for-bit identical due to e.g. delta encoding
3000 differences.
3000 differences.
3001
3001
3002 The ``deltareuse`` argument control how deltas from the existing revlog
3002 The ``deltareuse`` argument control how deltas from the existing revlog
3003 are preserved in the destination revlog. The argument can have the
3003 are preserved in the destination revlog. The argument can have the
3004 following values:
3004 following values:
3005
3005
3006 DELTAREUSEALWAYS
3006 DELTAREUSEALWAYS
3007 Deltas will always be reused (if possible), even if the destination
3007 Deltas will always be reused (if possible), even if the destination
3008 revlog would not select the same revisions for the delta. This is the
3008 revlog would not select the same revisions for the delta. This is the
3009 fastest mode of operation.
3009 fastest mode of operation.
3010 DELTAREUSESAMEREVS
3010 DELTAREUSESAMEREVS
3011 Deltas will be reused if the destination revlog would pick the same
3011 Deltas will be reused if the destination revlog would pick the same
3012 revisions for the delta. This mode strikes a balance between speed
3012 revisions for the delta. This mode strikes a balance between speed
3013 and optimization.
3013 and optimization.
3014 DELTAREUSENEVER
3014 DELTAREUSENEVER
3015 Deltas will never be reused. This is the slowest mode of execution.
3015 Deltas will never be reused. This is the slowest mode of execution.
3016 This mode can be used to recompute deltas (e.g. if the diff/delta
3016 This mode can be used to recompute deltas (e.g. if the diff/delta
3017 algorithm changes).
3017 algorithm changes).
3018 DELTAREUSEFULLADD
3018 DELTAREUSEFULLADD
3019 Revision will be re-added as if their were new content. This is
3019 Revision will be re-added as if their were new content. This is
3020 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3020 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3021 eg: large file detection and handling.
3021 eg: large file detection and handling.
3022
3022
3023 Delta computation can be slow, so the choice of delta reuse policy can
3023 Delta computation can be slow, so the choice of delta reuse policy can
3024 significantly affect run time.
3024 significantly affect run time.
3025
3025
3026 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3026 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3027 two extremes. Deltas will be reused if they are appropriate. But if the
3027 two extremes. Deltas will be reused if they are appropriate. But if the
3028 delta could choose a better revision, it will do so. This means if you
3028 delta could choose a better revision, it will do so. This means if you
3029 are converting a non-generaldelta revlog to a generaldelta revlog,
3029 are converting a non-generaldelta revlog to a generaldelta revlog,
3030 deltas will be recomputed if the delta's parent isn't a parent of the
3030 deltas will be recomputed if the delta's parent isn't a parent of the
3031 revision.
3031 revision.
3032
3032
3033 In addition to the delta policy, the ``forcedeltabothparents``
3033 In addition to the delta policy, the ``forcedeltabothparents``
3034 argument controls whether to force compute deltas against both parents
3034 argument controls whether to force compute deltas against both parents
3035 for merges. By default, the current default is used.
3035 for merges. By default, the current default is used.
3036
3036
3037 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3037 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3038 `sidedata_helpers`.
3038 `sidedata_helpers`.
3039 """
3039 """
3040 if deltareuse not in self.DELTAREUSEALL:
3040 if deltareuse not in self.DELTAREUSEALL:
3041 raise ValueError(
3041 raise ValueError(
3042 _(b'value for deltareuse invalid: %s') % deltareuse
3042 _(b'value for deltareuse invalid: %s') % deltareuse
3043 )
3043 )
3044
3044
3045 if len(destrevlog):
3045 if len(destrevlog):
3046 raise ValueError(_(b'destination revlog is not empty'))
3046 raise ValueError(_(b'destination revlog is not empty'))
3047
3047
3048 if getattr(self, 'filteredrevs', None):
3048 if getattr(self, 'filteredrevs', None):
3049 raise ValueError(_(b'source revlog has filtered revisions'))
3049 raise ValueError(_(b'source revlog has filtered revisions'))
3050 if getattr(destrevlog, 'filteredrevs', None):
3050 if getattr(destrevlog, 'filteredrevs', None):
3051 raise ValueError(_(b'destination revlog has filtered revisions'))
3051 raise ValueError(_(b'destination revlog has filtered revisions'))
3052
3052
3053 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3053 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3054 # if possible.
3054 # if possible.
3055 oldlazydelta = destrevlog._lazydelta
3055 oldlazydelta = destrevlog._lazydelta
3056 oldlazydeltabase = destrevlog._lazydeltabase
3056 oldlazydeltabase = destrevlog._lazydeltabase
3057 oldamd = destrevlog._deltabothparents
3057 oldamd = destrevlog._deltabothparents
3058
3058
3059 try:
3059 try:
3060 if deltareuse == self.DELTAREUSEALWAYS:
3060 if deltareuse == self.DELTAREUSEALWAYS:
3061 destrevlog._lazydeltabase = True
3061 destrevlog._lazydeltabase = True
3062 destrevlog._lazydelta = True
3062 destrevlog._lazydelta = True
3063 elif deltareuse == self.DELTAREUSESAMEREVS:
3063 elif deltareuse == self.DELTAREUSESAMEREVS:
3064 destrevlog._lazydeltabase = False
3064 destrevlog._lazydeltabase = False
3065 destrevlog._lazydelta = True
3065 destrevlog._lazydelta = True
3066 elif deltareuse == self.DELTAREUSENEVER:
3066 elif deltareuse == self.DELTAREUSENEVER:
3067 destrevlog._lazydeltabase = False
3067 destrevlog._lazydeltabase = False
3068 destrevlog._lazydelta = False
3068 destrevlog._lazydelta = False
3069
3069
3070 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3070 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3071
3071
3072 self._clone(
3072 self._clone(
3073 tr,
3073 tr,
3074 destrevlog,
3074 destrevlog,
3075 addrevisioncb,
3075 addrevisioncb,
3076 deltareuse,
3076 deltareuse,
3077 forcedeltabothparents,
3077 forcedeltabothparents,
3078 sidedata_helpers,
3078 sidedata_helpers,
3079 )
3079 )
3080
3080
3081 finally:
3081 finally:
3082 destrevlog._lazydelta = oldlazydelta
3082 destrevlog._lazydelta = oldlazydelta
3083 destrevlog._lazydeltabase = oldlazydeltabase
3083 destrevlog._lazydeltabase = oldlazydeltabase
3084 destrevlog._deltabothparents = oldamd
3084 destrevlog._deltabothparents = oldamd
3085
3085
3086 def _clone(
3086 def _clone(
3087 self,
3087 self,
3088 tr,
3088 tr,
3089 destrevlog,
3089 destrevlog,
3090 addrevisioncb,
3090 addrevisioncb,
3091 deltareuse,
3091 deltareuse,
3092 forcedeltabothparents,
3092 forcedeltabothparents,
3093 sidedata_helpers,
3093 sidedata_helpers,
3094 ):
3094 ):
3095 """perform the core duty of `revlog.clone` after parameter processing"""
3095 """perform the core duty of `revlog.clone` after parameter processing"""
3096 write_debug = None
3096 write_debug = None
3097 if self._debug_delta:
3097 if self._debug_delta:
3098 write_debug = tr._report
3098 write_debug = tr._report
3099 deltacomputer = deltautil.deltacomputer(
3099 deltacomputer = deltautil.deltacomputer(
3100 destrevlog,
3100 destrevlog,
3101 write_debug=write_debug,
3101 write_debug=write_debug,
3102 )
3102 )
3103 index = self.index
3103 index = self.index
3104 for rev in self:
3104 for rev in self:
3105 entry = index[rev]
3105 entry = index[rev]
3106
3106
3107 # Some classes override linkrev to take filtered revs into
3107 # Some classes override linkrev to take filtered revs into
3108 # account. Use raw entry from index.
3108 # account. Use raw entry from index.
3109 flags = entry[0] & 0xFFFF
3109 flags = entry[0] & 0xFFFF
3110 linkrev = entry[4]
3110 linkrev = entry[4]
3111 p1 = index[entry[5]][7]
3111 p1 = index[entry[5]][7]
3112 p2 = index[entry[6]][7]
3112 p2 = index[entry[6]][7]
3113 node = entry[7]
3113 node = entry[7]
3114
3114
3115 # (Possibly) reuse the delta from the revlog if allowed and
3115 # (Possibly) reuse the delta from the revlog if allowed and
3116 # the revlog chunk is a delta.
3116 # the revlog chunk is a delta.
3117 cachedelta = None
3117 cachedelta = None
3118 rawtext = None
3118 rawtext = None
3119 if deltareuse == self.DELTAREUSEFULLADD:
3119 if deltareuse == self.DELTAREUSEFULLADD:
3120 text = self._revisiondata(rev)
3120 text = self._revisiondata(rev)
3121 sidedata = self.sidedata(rev)
3121 sidedata = self.sidedata(rev)
3122
3122
3123 if sidedata_helpers is not None:
3123 if sidedata_helpers is not None:
3124 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3124 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3125 self, sidedata_helpers, sidedata, rev
3125 self, sidedata_helpers, sidedata, rev
3126 )
3126 )
3127 flags = flags | new_flags[0] & ~new_flags[1]
3127 flags = flags | new_flags[0] & ~new_flags[1]
3128
3128
3129 destrevlog.addrevision(
3129 destrevlog.addrevision(
3130 text,
3130 text,
3131 tr,
3131 tr,
3132 linkrev,
3132 linkrev,
3133 p1,
3133 p1,
3134 p2,
3134 p2,
3135 cachedelta=cachedelta,
3135 cachedelta=cachedelta,
3136 node=node,
3136 node=node,
3137 flags=flags,
3137 flags=flags,
3138 deltacomputer=deltacomputer,
3138 deltacomputer=deltacomputer,
3139 sidedata=sidedata,
3139 sidedata=sidedata,
3140 )
3140 )
3141 else:
3141 else:
3142 if destrevlog._lazydelta:
3142 if destrevlog._lazydelta:
3143 dp = self.deltaparent(rev)
3143 dp = self.deltaparent(rev)
3144 if dp != nullrev:
3144 if dp != nullrev:
3145 cachedelta = (dp, bytes(self._chunk(rev)))
3145 cachedelta = (dp, bytes(self._chunk(rev)))
3146
3146
3147 sidedata = None
3147 sidedata = None
3148 if not cachedelta:
3148 if not cachedelta:
3149 rawtext = self._revisiondata(rev)
3149 rawtext = self._revisiondata(rev)
3150 sidedata = self.sidedata(rev)
3150 sidedata = self.sidedata(rev)
3151 if sidedata is None:
3151 if sidedata is None:
3152 sidedata = self.sidedata(rev)
3152 sidedata = self.sidedata(rev)
3153
3153
3154 if sidedata_helpers is not None:
3154 if sidedata_helpers is not None:
3155 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3155 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3156 self, sidedata_helpers, sidedata, rev
3156 self, sidedata_helpers, sidedata, rev
3157 )
3157 )
3158 flags = flags | new_flags[0] & ~new_flags[1]
3158 flags = flags | new_flags[0] & ~new_flags[1]
3159
3159
3160 with destrevlog._writing(tr):
3160 with destrevlog._writing(tr):
3161 destrevlog._addrevision(
3161 destrevlog._addrevision(
3162 node,
3162 node,
3163 rawtext,
3163 rawtext,
3164 tr,
3164 tr,
3165 linkrev,
3165 linkrev,
3166 p1,
3166 p1,
3167 p2,
3167 p2,
3168 flags,
3168 flags,
3169 cachedelta,
3169 cachedelta,
3170 deltacomputer=deltacomputer,
3170 deltacomputer=deltacomputer,
3171 sidedata=sidedata,
3171 sidedata=sidedata,
3172 )
3172 )
3173
3173
3174 if addrevisioncb:
3174 if addrevisioncb:
3175 addrevisioncb(self, rev, node)
3175 addrevisioncb(self, rev, node)
3176
3176
3177 def censorrevision(self, tr, censornode, tombstone=b''):
3177 def censorrevision(self, tr, censornode, tombstone=b''):
3178 if self._format_version == REVLOGV0:
3178 if self._format_version == REVLOGV0:
3179 raise error.RevlogError(
3179 raise error.RevlogError(
3180 _(b'cannot censor with version %d revlogs')
3180 _(b'cannot censor with version %d revlogs')
3181 % self._format_version
3181 % self._format_version
3182 )
3182 )
3183 elif self._format_version == REVLOGV1:
3183 elif self._format_version == REVLOGV1:
3184 rewrite.v1_censor(self, tr, censornode, tombstone)
3184 rewrite.v1_censor(self, tr, censornode, tombstone)
3185 else:
3185 else:
3186 rewrite.v2_censor(self, tr, censornode, tombstone)
3186 rewrite.v2_censor(self, tr, censornode, tombstone)
3187
3187
3188 def verifyintegrity(self, state):
3188 def verifyintegrity(self, state):
3189 """Verifies the integrity of the revlog.
3189 """Verifies the integrity of the revlog.
3190
3190
3191 Yields ``revlogproblem`` instances describing problems that are
3191 Yields ``revlogproblem`` instances describing problems that are
3192 found.
3192 found.
3193 """
3193 """
3194 dd, di = self.checksize()
3194 dd, di = self.checksize()
3195 if dd:
3195 if dd:
3196 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3196 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3197 if di:
3197 if di:
3198 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3198 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3199
3199
3200 version = self._format_version
3200 version = self._format_version
3201
3201
3202 # The verifier tells us what version revlog we should be.
3202 # The verifier tells us what version revlog we should be.
3203 if version != state[b'expectedversion']:
3203 if version != state[b'expectedversion']:
3204 yield revlogproblem(
3204 yield revlogproblem(
3205 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3205 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3206 % (self.display_id, version, state[b'expectedversion'])
3206 % (self.display_id, version, state[b'expectedversion'])
3207 )
3207 )
3208
3208
3209 state[b'skipread'] = set()
3209 state[b'skipread'] = set()
3210 state[b'safe_renamed'] = set()
3210 state[b'safe_renamed'] = set()
3211
3211
3212 for rev in self:
3212 for rev in self:
3213 node = self.node(rev)
3213 node = self.node(rev)
3214
3214
3215 # Verify contents. 4 cases to care about:
3215 # Verify contents. 4 cases to care about:
3216 #
3216 #
3217 # common: the most common case
3217 # common: the most common case
3218 # rename: with a rename
3218 # rename: with a rename
3219 # meta: file content starts with b'\1\n', the metadata
3219 # meta: file content starts with b'\1\n', the metadata
3220 # header defined in filelog.py, but without a rename
3220 # header defined in filelog.py, but without a rename
3221 # ext: content stored externally
3221 # ext: content stored externally
3222 #
3222 #
3223 # More formally, their differences are shown below:
3223 # More formally, their differences are shown below:
3224 #
3224 #
3225 # | common | rename | meta | ext
3225 # | common | rename | meta | ext
3226 # -------------------------------------------------------
3226 # -------------------------------------------------------
3227 # flags() | 0 | 0 | 0 | not 0
3227 # flags() | 0 | 0 | 0 | not 0
3228 # renamed() | False | True | False | ?
3228 # renamed() | False | True | False | ?
3229 # rawtext[0:2]=='\1\n'| False | True | True | ?
3229 # rawtext[0:2]=='\1\n'| False | True | True | ?
3230 #
3230 #
3231 # "rawtext" means the raw text stored in revlog data, which
3231 # "rawtext" means the raw text stored in revlog data, which
3232 # could be retrieved by "rawdata(rev)". "text"
3232 # could be retrieved by "rawdata(rev)". "text"
3233 # mentioned below is "revision(rev)".
3233 # mentioned below is "revision(rev)".
3234 #
3234 #
3235 # There are 3 different lengths stored physically:
3235 # There are 3 different lengths stored physically:
3236 # 1. L1: rawsize, stored in revlog index
3236 # 1. L1: rawsize, stored in revlog index
3237 # 2. L2: len(rawtext), stored in revlog data
3237 # 2. L2: len(rawtext), stored in revlog data
3238 # 3. L3: len(text), stored in revlog data if flags==0, or
3238 # 3. L3: len(text), stored in revlog data if flags==0, or
3239 # possibly somewhere else if flags!=0
3239 # possibly somewhere else if flags!=0
3240 #
3240 #
3241 # L1 should be equal to L2. L3 could be different from them.
3241 # L1 should be equal to L2. L3 could be different from them.
3242 # "text" may or may not affect commit hash depending on flag
3242 # "text" may or may not affect commit hash depending on flag
3243 # processors (see flagutil.addflagprocessor).
3243 # processors (see flagutil.addflagprocessor).
3244 #
3244 #
3245 # | common | rename | meta | ext
3245 # | common | rename | meta | ext
3246 # -------------------------------------------------
3246 # -------------------------------------------------
3247 # rawsize() | L1 | L1 | L1 | L1
3247 # rawsize() | L1 | L1 | L1 | L1
3248 # size() | L1 | L2-LM | L1(*) | L1 (?)
3248 # size() | L1 | L2-LM | L1(*) | L1 (?)
3249 # len(rawtext) | L2 | L2 | L2 | L2
3249 # len(rawtext) | L2 | L2 | L2 | L2
3250 # len(text) | L2 | L2 | L2 | L3
3250 # len(text) | L2 | L2 | L2 | L3
3251 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3251 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3252 #
3252 #
3253 # LM: length of metadata, depending on rawtext
3253 # LM: length of metadata, depending on rawtext
3254 # (*): not ideal, see comment in filelog.size
3254 # (*): not ideal, see comment in filelog.size
3255 # (?): could be "- len(meta)" if the resolved content has
3255 # (?): could be "- len(meta)" if the resolved content has
3256 # rename metadata
3256 # rename metadata
3257 #
3257 #
3258 # Checks needed to be done:
3258 # Checks needed to be done:
3259 # 1. length check: L1 == L2, in all cases.
3259 # 1. length check: L1 == L2, in all cases.
3260 # 2. hash check: depending on flag processor, we may need to
3260 # 2. hash check: depending on flag processor, we may need to
3261 # use either "text" (external), or "rawtext" (in revlog).
3261 # use either "text" (external), or "rawtext" (in revlog).
3262
3262
3263 try:
3263 try:
3264 skipflags = state.get(b'skipflags', 0)
3264 skipflags = state.get(b'skipflags', 0)
3265 if skipflags:
3265 if skipflags:
3266 skipflags &= self.flags(rev)
3266 skipflags &= self.flags(rev)
3267
3267
3268 _verify_revision(self, skipflags, state, node)
3268 _verify_revision(self, skipflags, state, node)
3269
3269
3270 l1 = self.rawsize(rev)
3270 l1 = self.rawsize(rev)
3271 l2 = len(self.rawdata(node))
3271 l2 = len(self.rawdata(node))
3272
3272
3273 if l1 != l2:
3273 if l1 != l2:
3274 yield revlogproblem(
3274 yield revlogproblem(
3275 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3275 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3276 node=node,
3276 node=node,
3277 )
3277 )
3278
3278
3279 except error.CensoredNodeError:
3279 except error.CensoredNodeError:
3280 if state[b'erroroncensored']:
3280 if state[b'erroroncensored']:
3281 yield revlogproblem(
3281 yield revlogproblem(
3282 error=_(b'censored file data'), node=node
3282 error=_(b'censored file data'), node=node
3283 )
3283 )
3284 state[b'skipread'].add(node)
3284 state[b'skipread'].add(node)
3285 except Exception as e:
3285 except Exception as e:
3286 yield revlogproblem(
3286 yield revlogproblem(
3287 error=_(b'unpacking %s: %s')
3287 error=_(b'unpacking %s: %s')
3288 % (short(node), stringutil.forcebytestr(e)),
3288 % (short(node), stringutil.forcebytestr(e)),
3289 node=node,
3289 node=node,
3290 )
3290 )
3291 state[b'skipread'].add(node)
3291 state[b'skipread'].add(node)
3292
3292
3293 def storageinfo(
3293 def storageinfo(
3294 self,
3294 self,
3295 exclusivefiles=False,
3295 exclusivefiles=False,
3296 sharedfiles=False,
3296 sharedfiles=False,
3297 revisionscount=False,
3297 revisionscount=False,
3298 trackedsize=False,
3298 trackedsize=False,
3299 storedsize=False,
3299 storedsize=False,
3300 ):
3300 ):
3301 d = {}
3301 d = {}
3302
3302
3303 if exclusivefiles:
3303 if exclusivefiles:
3304 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3304 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3305 if not self._inline:
3305 if not self._inline:
3306 d[b'exclusivefiles'].append((self.opener, self._datafile))
3306 d[b'exclusivefiles'].append((self.opener, self._datafile))
3307
3307
3308 if sharedfiles:
3308 if sharedfiles:
3309 d[b'sharedfiles'] = []
3309 d[b'sharedfiles'] = []
3310
3310
3311 if revisionscount:
3311 if revisionscount:
3312 d[b'revisionscount'] = len(self)
3312 d[b'revisionscount'] = len(self)
3313
3313
3314 if trackedsize:
3314 if trackedsize:
3315 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3315 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3316
3316
3317 if storedsize:
3317 if storedsize:
3318 d[b'storedsize'] = sum(
3318 d[b'storedsize'] = sum(
3319 self.opener.stat(path).st_size for path in self.files()
3319 self.opener.stat(path).st_size for path in self.files()
3320 )
3320 )
3321
3321
3322 return d
3322 return d
3323
3323
3324 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3324 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3325 if not self.hassidedata:
3325 if not self.hassidedata:
3326 return
3326 return
3327 # revlog formats with sidedata support does not support inline
3327 # revlog formats with sidedata support does not support inline
3328 assert not self._inline
3328 assert not self._inline
3329 if not helpers[1] and not helpers[2]:
3329 if not helpers[1] and not helpers[2]:
3330 # Nothing to generate or remove
3330 # Nothing to generate or remove
3331 return
3331 return
3332
3332
3333 new_entries = []
3333 new_entries = []
3334 # append the new sidedata
3334 # append the new sidedata
3335 with self._writing(transaction):
3335 with self._writing(transaction):
3336 ifh, dfh, sdfh = self._writinghandles
3336 ifh, dfh, sdfh = self._writinghandles
3337 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3337 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3338
3338
3339 current_offset = sdfh.tell()
3339 current_offset = sdfh.tell()
3340 for rev in range(startrev, endrev + 1):
3340 for rev in range(startrev, endrev + 1):
3341 entry = self.index[rev]
3341 entry = self.index[rev]
3342 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3342 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3343 store=self,
3343 store=self,
3344 sidedata_helpers=helpers,
3344 sidedata_helpers=helpers,
3345 sidedata={},
3345 sidedata={},
3346 rev=rev,
3346 rev=rev,
3347 )
3347 )
3348
3348
3349 serialized_sidedata = sidedatautil.serialize_sidedata(
3349 serialized_sidedata = sidedatautil.serialize_sidedata(
3350 new_sidedata
3350 new_sidedata
3351 )
3351 )
3352
3352
3353 sidedata_compression_mode = COMP_MODE_INLINE
3353 sidedata_compression_mode = COMP_MODE_INLINE
3354 if serialized_sidedata and self.hassidedata:
3354 if serialized_sidedata and self.hassidedata:
3355 sidedata_compression_mode = COMP_MODE_PLAIN
3355 sidedata_compression_mode = COMP_MODE_PLAIN
3356 h, comp_sidedata = self.compress(serialized_sidedata)
3356 h, comp_sidedata = self.compress(serialized_sidedata)
3357 if (
3357 if (
3358 h != b'u'
3358 h != b'u'
3359 and comp_sidedata[0] != b'\0'
3359 and comp_sidedata[0] != b'\0'
3360 and len(comp_sidedata) < len(serialized_sidedata)
3360 and len(comp_sidedata) < len(serialized_sidedata)
3361 ):
3361 ):
3362 assert not h
3362 assert not h
3363 if (
3363 if (
3364 comp_sidedata[0]
3364 comp_sidedata[0]
3365 == self._docket.default_compression_header
3365 == self._docket.default_compression_header
3366 ):
3366 ):
3367 sidedata_compression_mode = COMP_MODE_DEFAULT
3367 sidedata_compression_mode = COMP_MODE_DEFAULT
3368 serialized_sidedata = comp_sidedata
3368 serialized_sidedata = comp_sidedata
3369 else:
3369 else:
3370 sidedata_compression_mode = COMP_MODE_INLINE
3370 sidedata_compression_mode = COMP_MODE_INLINE
3371 serialized_sidedata = comp_sidedata
3371 serialized_sidedata = comp_sidedata
3372 if entry[8] != 0 or entry[9] != 0:
3372 if entry[8] != 0 or entry[9] != 0:
3373 # rewriting entries that already have sidedata is not
3373 # rewriting entries that already have sidedata is not
3374 # supported yet, because it introduces garbage data in the
3374 # supported yet, because it introduces garbage data in the
3375 # revlog.
3375 # revlog.
3376 msg = b"rewriting existing sidedata is not supported yet"
3376 msg = b"rewriting existing sidedata is not supported yet"
3377 raise error.Abort(msg)
3377 raise error.Abort(msg)
3378
3378
3379 # Apply (potential) flags to add and to remove after running
3379 # Apply (potential) flags to add and to remove after running
3380 # the sidedata helpers
3380 # the sidedata helpers
3381 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3381 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3382 entry_update = (
3382 entry_update = (
3383 current_offset,
3383 current_offset,
3384 len(serialized_sidedata),
3384 len(serialized_sidedata),
3385 new_offset_flags,
3385 new_offset_flags,
3386 sidedata_compression_mode,
3386 sidedata_compression_mode,
3387 )
3387 )
3388
3388
3389 # the sidedata computation might have move the file cursors around
3389 # the sidedata computation might have move the file cursors around
3390 sdfh.seek(current_offset, os.SEEK_SET)
3390 sdfh.seek(current_offset, os.SEEK_SET)
3391 sdfh.write(serialized_sidedata)
3391 sdfh.write(serialized_sidedata)
3392 new_entries.append(entry_update)
3392 new_entries.append(entry_update)
3393 current_offset += len(serialized_sidedata)
3393 current_offset += len(serialized_sidedata)
3394 self._docket.sidedata_end = sdfh.tell()
3394 self._docket.sidedata_end = sdfh.tell()
3395
3395
3396 # rewrite the new index entries
3396 # rewrite the new index entries
3397 ifh.seek(startrev * self.index.entry_size)
3397 ifh.seek(startrev * self.index.entry_size)
3398 for i, e in enumerate(new_entries):
3398 for i, e in enumerate(new_entries):
3399 rev = startrev + i
3399 rev = startrev + i
3400 self.index.replace_sidedata_info(rev, *e)
3400 self.index.replace_sidedata_info(rev, *e)
3401 packed = self.index.entry_binary(rev)
3401 packed = self.index.entry_binary(rev)
3402 if rev == 0 and self._docket is None:
3402 if rev == 0 and self._docket is None:
3403 header = self._format_flags | self._format_version
3403 header = self._format_flags | self._format_version
3404 header = self.index.pack_header(header)
3404 header = self.index.pack_header(header)
3405 packed = header + packed
3405 packed = header + packed
3406 ifh.write(packed)
3406 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now