##// END OF EJS Templates
revlog: use the new `entry` function in revlog.py...
marmoute -
r48188:8230f020 default
parent child Browse files
Show More
@@ -1,3400 +1,3401 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 revlogv0,
89 revlogv0,
90 sidedata as sidedatautil,
90 sidedata as sidedatautil,
91 )
91 )
92 from .utils import (
92 from .utils import (
93 storageutil,
93 storageutil,
94 stringutil,
94 stringutil,
95 )
95 )
96
96
97 # blanked usage of all the name to prevent pyflakes constraints
97 # blanked usage of all the name to prevent pyflakes constraints
98 # We need these name available in the module for extensions.
98 # We need these name available in the module for extensions.
99
99
100 REVLOGV0
100 REVLOGV0
101 REVLOGV1
101 REVLOGV1
102 REVLOGV2
102 REVLOGV2
103 FLAG_INLINE_DATA
103 FLAG_INLINE_DATA
104 FLAG_GENERALDELTA
104 FLAG_GENERALDELTA
105 REVLOG_DEFAULT_FLAGS
105 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FORMAT
106 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_VERSION
107 REVLOG_DEFAULT_VERSION
108 REVLOGV1_FLAGS
108 REVLOGV1_FLAGS
109 REVLOGV2_FLAGS
109 REVLOGV2_FLAGS
110 REVIDX_ISCENSORED
110 REVIDX_ISCENSORED
111 REVIDX_ELLIPSIS
111 REVIDX_ELLIPSIS
112 REVIDX_HASCOPIESINFO
112 REVIDX_HASCOPIESINFO
113 REVIDX_EXTSTORED
113 REVIDX_EXTSTORED
114 REVIDX_DEFAULT_FLAGS
114 REVIDX_DEFAULT_FLAGS
115 REVIDX_FLAGS_ORDER
115 REVIDX_FLAGS_ORDER
116 REVIDX_RAWTEXT_CHANGING_FLAGS
116 REVIDX_RAWTEXT_CHANGING_FLAGS
117
117
118 parsers = policy.importmod('parsers')
118 parsers = policy.importmod('parsers')
119 rustancestor = policy.importrust('ancestor')
119 rustancestor = policy.importrust('ancestor')
120 rustdagop = policy.importrust('dagop')
120 rustdagop = policy.importrust('dagop')
121 rustrevlog = policy.importrust('revlog')
121 rustrevlog = policy.importrust('revlog')
122
122
123 # Aliased for performance.
123 # Aliased for performance.
124 _zlibdecompress = zlib.decompress
124 _zlibdecompress = zlib.decompress
125
125
126 # max size of revlog with inline data
126 # max size of revlog with inline data
127 _maxinline = 131072
127 _maxinline = 131072
128 _chunksize = 1048576
128 _chunksize = 1048576
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @attr.s(slots=True, frozen=True)
170 @attr.s(slots=True, frozen=True)
171 class _revisioninfo(object):
171 class _revisioninfo(object):
172 """Information about a revision that allows building its fulltext
172 """Information about a revision that allows building its fulltext
173 node: expected hash of the revision
173 node: expected hash of the revision
174 p1, p2: parent revs of the revision
174 p1, p2: parent revs of the revision
175 btext: built text cache consisting of a one-element list
175 btext: built text cache consisting of a one-element list
176 cachedelta: (baserev, uncompressed_delta) or None
176 cachedelta: (baserev, uncompressed_delta) or None
177 flags: flags associated to the revision storage
177 flags: flags associated to the revision storage
178
178
179 One of btext[0] or cachedelta must be set.
179 One of btext[0] or cachedelta must be set.
180 """
180 """
181
181
182 node = attr.ib()
182 node = attr.ib()
183 p1 = attr.ib()
183 p1 = attr.ib()
184 p2 = attr.ib()
184 p2 = attr.ib()
185 btext = attr.ib()
185 btext = attr.ib()
186 textlen = attr.ib()
186 textlen = attr.ib()
187 cachedelta = attr.ib()
187 cachedelta = attr.ib()
188 flags = attr.ib()
188 flags = attr.ib()
189
189
190
190
191 @interfaceutil.implementer(repository.irevisiondelta)
191 @interfaceutil.implementer(repository.irevisiondelta)
192 @attr.s(slots=True)
192 @attr.s(slots=True)
193 class revlogrevisiondelta(object):
193 class revlogrevisiondelta(object):
194 node = attr.ib()
194 node = attr.ib()
195 p1node = attr.ib()
195 p1node = attr.ib()
196 p2node = attr.ib()
196 p2node = attr.ib()
197 basenode = attr.ib()
197 basenode = attr.ib()
198 flags = attr.ib()
198 flags = attr.ib()
199 baserevisionsize = attr.ib()
199 baserevisionsize = attr.ib()
200 revision = attr.ib()
200 revision = attr.ib()
201 delta = attr.ib()
201 delta = attr.ib()
202 sidedata = attr.ib()
202 sidedata = attr.ib()
203 protocol_flags = attr.ib()
203 protocol_flags = attr.ib()
204 linknode = attr.ib(default=None)
204 linknode = attr.ib(default=None)
205
205
206
206
207 @interfaceutil.implementer(repository.iverifyproblem)
207 @interfaceutil.implementer(repository.iverifyproblem)
208 @attr.s(frozen=True)
208 @attr.s(frozen=True)
209 class revlogproblem(object):
209 class revlogproblem(object):
210 warning = attr.ib(default=None)
210 warning = attr.ib(default=None)
211 error = attr.ib(default=None)
211 error = attr.ib(default=None)
212 node = attr.ib(default=None)
212 node = attr.ib(default=None)
213
213
214
214
215 def parse_index_v1(data, inline):
215 def parse_index_v1(data, inline):
216 # call the C implementation to parse the index data
216 # call the C implementation to parse the index data
217 index, cache = parsers.parse_index2(data, inline)
217 index, cache = parsers.parse_index2(data, inline)
218 return index, cache
218 return index, cache
219
219
220
220
221 def parse_index_v2(data, inline):
221 def parse_index_v2(data, inline):
222 # call the C implementation to parse the index data
222 # call the C implementation to parse the index data
223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
223 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 return index, cache
224 return index, cache
225
225
226
226
227 def parse_index_cl_v2(data, inline):
227 def parse_index_cl_v2(data, inline):
228 # call the C implementation to parse the index data
228 # call the C implementation to parse the index data
229 assert not inline
229 assert not inline
230 from .pure.parsers import parse_index_cl_v2
230 from .pure.parsers import parse_index_cl_v2
231
231
232 index, cache = parse_index_cl_v2(data)
232 index, cache = parse_index_cl_v2(data)
233 return index, cache
233 return index, cache
234
234
235
235
236 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
236 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
237
237
238 def parse_index_v1_nodemap(data, inline):
238 def parse_index_v1_nodemap(data, inline):
239 index, cache = parsers.parse_index_devel_nodemap(data, inline)
239 index, cache = parsers.parse_index_devel_nodemap(data, inline)
240 return index, cache
240 return index, cache
241
241
242
242
243 else:
243 else:
244 parse_index_v1_nodemap = None
244 parse_index_v1_nodemap = None
245
245
246
246
247 def parse_index_v1_mixed(data, inline):
247 def parse_index_v1_mixed(data, inline):
248 index, cache = parse_index_v1(data, inline)
248 index, cache = parse_index_v1(data, inline)
249 return rustrevlog.MixedIndex(index), cache
249 return rustrevlog.MixedIndex(index), cache
250
250
251
251
252 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
252 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
253 # signed integer)
253 # signed integer)
254 _maxentrysize = 0x7FFFFFFF
254 _maxentrysize = 0x7FFFFFFF
255
255
256 PARTIAL_READ_MSG = _(
256 PARTIAL_READ_MSG = _(
257 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
257 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
258 )
258 )
259
259
260 FILE_TOO_SHORT_MSG = _(
260 FILE_TOO_SHORT_MSG = _(
261 b'cannot read from revlog %s;'
261 b'cannot read from revlog %s;'
262 b' expected %d bytes from offset %d, data size is %d'
262 b' expected %d bytes from offset %d, data size is %d'
263 )
263 )
264
264
265
265
266 class revlog(object):
266 class revlog(object):
267 """
267 """
268 the underlying revision storage object
268 the underlying revision storage object
269
269
270 A revlog consists of two parts, an index and the revision data.
270 A revlog consists of two parts, an index and the revision data.
271
271
272 The index is a file with a fixed record size containing
272 The index is a file with a fixed record size containing
273 information on each revision, including its nodeid (hash), the
273 information on each revision, including its nodeid (hash), the
274 nodeids of its parents, the position and offset of its data within
274 nodeids of its parents, the position and offset of its data within
275 the data file, and the revision it's based on. Finally, each entry
275 the data file, and the revision it's based on. Finally, each entry
276 contains a linkrev entry that can serve as a pointer to external
276 contains a linkrev entry that can serve as a pointer to external
277 data.
277 data.
278
278
279 The revision data itself is a linear collection of data chunks.
279 The revision data itself is a linear collection of data chunks.
280 Each chunk represents a revision and is usually represented as a
280 Each chunk represents a revision and is usually represented as a
281 delta against the previous chunk. To bound lookup time, runs of
281 delta against the previous chunk. To bound lookup time, runs of
282 deltas are limited to about 2 times the length of the original
282 deltas are limited to about 2 times the length of the original
283 version data. This makes retrieval of a version proportional to
283 version data. This makes retrieval of a version proportional to
284 its size, or O(1) relative to the number of revisions.
284 its size, or O(1) relative to the number of revisions.
285
285
286 Both pieces of the revlog are written to in an append-only
286 Both pieces of the revlog are written to in an append-only
287 fashion, which means we never need to rewrite a file to insert or
287 fashion, which means we never need to rewrite a file to insert or
288 remove data, and can use some simple techniques to avoid the need
288 remove data, and can use some simple techniques to avoid the need
289 for locking while reading.
289 for locking while reading.
290
290
291 If checkambig, indexfile is opened with checkambig=True at
291 If checkambig, indexfile is opened with checkambig=True at
292 writing, to avoid file stat ambiguity.
292 writing, to avoid file stat ambiguity.
293
293
294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
295 index will be mmapped rather than read if it is larger than the
295 index will be mmapped rather than read if it is larger than the
296 configured threshold.
296 configured threshold.
297
297
298 If censorable is True, the revlog can have censored revisions.
298 If censorable is True, the revlog can have censored revisions.
299
299
300 If `upperboundcomp` is not None, this is the expected maximal gain from
300 If `upperboundcomp` is not None, this is the expected maximal gain from
301 compression for the data content.
301 compression for the data content.
302
302
303 `concurrencychecker` is an optional function that receives 3 arguments: a
303 `concurrencychecker` is an optional function that receives 3 arguments: a
304 file handle, a filename, and an expected position. It should check whether
304 file handle, a filename, and an expected position. It should check whether
305 the current position in the file handle is valid, and log/warn/fail (by
305 the current position in the file handle is valid, and log/warn/fail (by
306 raising).
306 raising).
307
307
308 See mercurial/revlogutils/contants.py for details about the content of an
308 See mercurial/revlogutils/contants.py for details about the content of an
309 index entry.
309 index entry.
310 """
310 """
311
311
312 _flagserrorclass = error.RevlogError
312 _flagserrorclass = error.RevlogError
313
313
314 def __init__(
314 def __init__(
315 self,
315 self,
316 opener,
316 opener,
317 target,
317 target,
318 radix,
318 radix,
319 postfix=None, # only exist for `tmpcensored` now
319 postfix=None, # only exist for `tmpcensored` now
320 checkambig=False,
320 checkambig=False,
321 mmaplargeindex=False,
321 mmaplargeindex=False,
322 censorable=False,
322 censorable=False,
323 upperboundcomp=None,
323 upperboundcomp=None,
324 persistentnodemap=False,
324 persistentnodemap=False,
325 concurrencychecker=None,
325 concurrencychecker=None,
326 trypending=False,
326 trypending=False,
327 ):
327 ):
328 """
328 """
329 create a revlog object
329 create a revlog object
330
330
331 opener is a function that abstracts the file opening operation
331 opener is a function that abstracts the file opening operation
332 and can be used to implement COW semantics or the like.
332 and can be used to implement COW semantics or the like.
333
333
334 `target`: a (KIND, ID) tuple that identify the content stored in
334 `target`: a (KIND, ID) tuple that identify the content stored in
335 this revlog. It help the rest of the code to understand what the revlog
335 this revlog. It help the rest of the code to understand what the revlog
336 is about without having to resort to heuristic and index filename
336 is about without having to resort to heuristic and index filename
337 analysis. Note: that this must be reliably be set by normal code, but
337 analysis. Note: that this must be reliably be set by normal code, but
338 that test, debug, or performance measurement code might not set this to
338 that test, debug, or performance measurement code might not set this to
339 accurate value.
339 accurate value.
340 """
340 """
341 self.upperboundcomp = upperboundcomp
341 self.upperboundcomp = upperboundcomp
342
342
343 self.radix = radix
343 self.radix = radix
344
344
345 self._docket_file = None
345 self._docket_file = None
346 self._indexfile = None
346 self._indexfile = None
347 self._datafile = None
347 self._datafile = None
348 self._sidedatafile = None
348 self._sidedatafile = None
349 self._nodemap_file = None
349 self._nodemap_file = None
350 self.postfix = postfix
350 self.postfix = postfix
351 self._trypending = trypending
351 self._trypending = trypending
352 self.opener = opener
352 self.opener = opener
353 if persistentnodemap:
353 if persistentnodemap:
354 self._nodemap_file = nodemaputil.get_nodemap_file(self)
354 self._nodemap_file = nodemaputil.get_nodemap_file(self)
355
355
356 assert target[0] in ALL_KINDS
356 assert target[0] in ALL_KINDS
357 assert len(target) == 2
357 assert len(target) == 2
358 self.target = target
358 self.target = target
359 # When True, indexfile is opened with checkambig=True at writing, to
359 # When True, indexfile is opened with checkambig=True at writing, to
360 # avoid file stat ambiguity.
360 # avoid file stat ambiguity.
361 self._checkambig = checkambig
361 self._checkambig = checkambig
362 self._mmaplargeindex = mmaplargeindex
362 self._mmaplargeindex = mmaplargeindex
363 self._censorable = censorable
363 self._censorable = censorable
364 # 3-tuple of (node, rev, text) for a raw revision.
364 # 3-tuple of (node, rev, text) for a raw revision.
365 self._revisioncache = None
365 self._revisioncache = None
366 # Maps rev to chain base rev.
366 # Maps rev to chain base rev.
367 self._chainbasecache = util.lrucachedict(100)
367 self._chainbasecache = util.lrucachedict(100)
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 self._chunkcache = (0, b'')
369 self._chunkcache = (0, b'')
370 # How much data to read and cache into the raw revlog data cache.
370 # How much data to read and cache into the raw revlog data cache.
371 self._chunkcachesize = 65536
371 self._chunkcachesize = 65536
372 self._maxchainlen = None
372 self._maxchainlen = None
373 self._deltabothparents = True
373 self._deltabothparents = True
374 self.index = None
374 self.index = None
375 self._docket = None
375 self._docket = None
376 self._nodemap_docket = None
376 self._nodemap_docket = None
377 # Mapping of partial identifiers to full nodes.
377 # Mapping of partial identifiers to full nodes.
378 self._pcache = {}
378 self._pcache = {}
379 # Mapping of revision integer to full node.
379 # Mapping of revision integer to full node.
380 self._compengine = b'zlib'
380 self._compengine = b'zlib'
381 self._compengineopts = {}
381 self._compengineopts = {}
382 self._maxdeltachainspan = -1
382 self._maxdeltachainspan = -1
383 self._withsparseread = False
383 self._withsparseread = False
384 self._sparserevlog = False
384 self._sparserevlog = False
385 self.hassidedata = False
385 self.hassidedata = False
386 self._srdensitythreshold = 0.50
386 self._srdensitythreshold = 0.50
387 self._srmingapsize = 262144
387 self._srmingapsize = 262144
388
388
389 # Make copy of flag processors so each revlog instance can support
389 # Make copy of flag processors so each revlog instance can support
390 # custom flags.
390 # custom flags.
391 self._flagprocessors = dict(flagutil.flagprocessors)
391 self._flagprocessors = dict(flagutil.flagprocessors)
392
392
393 # 3-tuple of file handles being used for active writing.
393 # 3-tuple of file handles being used for active writing.
394 self._writinghandles = None
394 self._writinghandles = None
395 # prevent nesting of addgroup
395 # prevent nesting of addgroup
396 self._adding_group = None
396 self._adding_group = None
397
397
398 self._loadindex()
398 self._loadindex()
399
399
400 self._concurrencychecker = concurrencychecker
400 self._concurrencychecker = concurrencychecker
401
401
402 def _init_opts(self):
402 def _init_opts(self):
403 """process options (from above/config) to setup associated default revlog mode
403 """process options (from above/config) to setup associated default revlog mode
404
404
405 These values might be affected when actually reading on disk information.
405 These values might be affected when actually reading on disk information.
406
406
407 The relevant values are returned for use in _loadindex().
407 The relevant values are returned for use in _loadindex().
408
408
409 * newversionflags:
409 * newversionflags:
410 version header to use if we need to create a new revlog
410 version header to use if we need to create a new revlog
411
411
412 * mmapindexthreshold:
412 * mmapindexthreshold:
413 minimal index size for start to use mmap
413 minimal index size for start to use mmap
414
414
415 * force_nodemap:
415 * force_nodemap:
416 force the usage of a "development" version of the nodemap code
416 force the usage of a "development" version of the nodemap code
417 """
417 """
418 mmapindexthreshold = None
418 mmapindexthreshold = None
419 opts = self.opener.options
419 opts = self.opener.options
420
420
421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
421 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
422 new_header = CHANGELOGV2
422 new_header = CHANGELOGV2
423 elif b'revlogv2' in opts:
423 elif b'revlogv2' in opts:
424 new_header = REVLOGV2
424 new_header = REVLOGV2
425 elif b'revlogv1' in opts:
425 elif b'revlogv1' in opts:
426 new_header = REVLOGV1 | FLAG_INLINE_DATA
426 new_header = REVLOGV1 | FLAG_INLINE_DATA
427 if b'generaldelta' in opts:
427 if b'generaldelta' in opts:
428 new_header |= FLAG_GENERALDELTA
428 new_header |= FLAG_GENERALDELTA
429 elif b'revlogv0' in self.opener.options:
429 elif b'revlogv0' in self.opener.options:
430 new_header = REVLOGV0
430 new_header = REVLOGV0
431 else:
431 else:
432 new_header = REVLOG_DEFAULT_VERSION
432 new_header = REVLOG_DEFAULT_VERSION
433
433
434 if b'chunkcachesize' in opts:
434 if b'chunkcachesize' in opts:
435 self._chunkcachesize = opts[b'chunkcachesize']
435 self._chunkcachesize = opts[b'chunkcachesize']
436 if b'maxchainlen' in opts:
436 if b'maxchainlen' in opts:
437 self._maxchainlen = opts[b'maxchainlen']
437 self._maxchainlen = opts[b'maxchainlen']
438 if b'deltabothparents' in opts:
438 if b'deltabothparents' in opts:
439 self._deltabothparents = opts[b'deltabothparents']
439 self._deltabothparents = opts[b'deltabothparents']
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
440 self._lazydelta = bool(opts.get(b'lazydelta', True))
441 self._lazydeltabase = False
441 self._lazydeltabase = False
442 if self._lazydelta:
442 if self._lazydelta:
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
443 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
444 if b'compengine' in opts:
444 if b'compengine' in opts:
445 self._compengine = opts[b'compengine']
445 self._compengine = opts[b'compengine']
446 if b'zlib.level' in opts:
446 if b'zlib.level' in opts:
447 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
447 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
448 if b'zstd.level' in opts:
448 if b'zstd.level' in opts:
449 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
449 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
450 if b'maxdeltachainspan' in opts:
450 if b'maxdeltachainspan' in opts:
451 self._maxdeltachainspan = opts[b'maxdeltachainspan']
451 self._maxdeltachainspan = opts[b'maxdeltachainspan']
452 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
452 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
453 mmapindexthreshold = opts[b'mmapindexthreshold']
453 mmapindexthreshold = opts[b'mmapindexthreshold']
454 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
454 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
455 withsparseread = bool(opts.get(b'with-sparse-read', False))
455 withsparseread = bool(opts.get(b'with-sparse-read', False))
456 # sparse-revlog forces sparse-read
456 # sparse-revlog forces sparse-read
457 self._withsparseread = self._sparserevlog or withsparseread
457 self._withsparseread = self._sparserevlog or withsparseread
458 if b'sparse-read-density-threshold' in opts:
458 if b'sparse-read-density-threshold' in opts:
459 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
459 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
460 if b'sparse-read-min-gap-size' in opts:
460 if b'sparse-read-min-gap-size' in opts:
461 self._srmingapsize = opts[b'sparse-read-min-gap-size']
461 self._srmingapsize = opts[b'sparse-read-min-gap-size']
462 if opts.get(b'enableellipsis'):
462 if opts.get(b'enableellipsis'):
463 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
463 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
464
464
465 # revlog v0 doesn't have flag processors
465 # revlog v0 doesn't have flag processors
466 for flag, processor in pycompat.iteritems(
466 for flag, processor in pycompat.iteritems(
467 opts.get(b'flagprocessors', {})
467 opts.get(b'flagprocessors', {})
468 ):
468 ):
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
469 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
470
470
471 if self._chunkcachesize <= 0:
471 if self._chunkcachesize <= 0:
472 raise error.RevlogError(
472 raise error.RevlogError(
473 _(b'revlog chunk cache size %r is not greater than 0')
473 _(b'revlog chunk cache size %r is not greater than 0')
474 % self._chunkcachesize
474 % self._chunkcachesize
475 )
475 )
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
476 elif self._chunkcachesize & (self._chunkcachesize - 1):
477 raise error.RevlogError(
477 raise error.RevlogError(
478 _(b'revlog chunk cache size %r is not a power of 2')
478 _(b'revlog chunk cache size %r is not a power of 2')
479 % self._chunkcachesize
479 % self._chunkcachesize
480 )
480 )
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
481 force_nodemap = opts.get(b'devel-force-nodemap', False)
482 return new_header, mmapindexthreshold, force_nodemap
482 return new_header, mmapindexthreshold, force_nodemap
483
483
484 def _get_data(self, filepath, mmap_threshold, size=None):
484 def _get_data(self, filepath, mmap_threshold, size=None):
485 """return a file content with or without mmap
485 """return a file content with or without mmap
486
486
487 If the file is missing return the empty string"""
487 If the file is missing return the empty string"""
488 try:
488 try:
489 with self.opener(filepath) as fp:
489 with self.opener(filepath) as fp:
490 if mmap_threshold is not None:
490 if mmap_threshold is not None:
491 file_size = self.opener.fstat(fp).st_size
491 file_size = self.opener.fstat(fp).st_size
492 if file_size >= mmap_threshold:
492 if file_size >= mmap_threshold:
493 if size is not None:
493 if size is not None:
494 # avoid potentiel mmap crash
494 # avoid potentiel mmap crash
495 size = min(file_size, size)
495 size = min(file_size, size)
496 # TODO: should .close() to release resources without
496 # TODO: should .close() to release resources without
497 # relying on Python GC
497 # relying on Python GC
498 if size is None:
498 if size is None:
499 return util.buffer(util.mmapread(fp))
499 return util.buffer(util.mmapread(fp))
500 else:
500 else:
501 return util.buffer(util.mmapread(fp, size))
501 return util.buffer(util.mmapread(fp, size))
502 if size is None:
502 if size is None:
503 return fp.read()
503 return fp.read()
504 else:
504 else:
505 return fp.read(size)
505 return fp.read(size)
506 except IOError as inst:
506 except IOError as inst:
507 if inst.errno != errno.ENOENT:
507 if inst.errno != errno.ENOENT:
508 raise
508 raise
509 return b''
509 return b''
510
510
511 def _loadindex(self):
511 def _loadindex(self):
512
512
513 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
513 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
514
514
515 if self.postfix is not None:
515 if self.postfix is not None:
516 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
516 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
517 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
517 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
518 entry_point = b'%s.i.a' % self.radix
518 entry_point = b'%s.i.a' % self.radix
519 else:
519 else:
520 entry_point = b'%s.i' % self.radix
520 entry_point = b'%s.i' % self.radix
521
521
522 entry_data = b''
522 entry_data = b''
523 self._initempty = True
523 self._initempty = True
524 entry_data = self._get_data(entry_point, mmapindexthreshold)
524 entry_data = self._get_data(entry_point, mmapindexthreshold)
525 if len(entry_data) > 0:
525 if len(entry_data) > 0:
526 header = INDEX_HEADER.unpack(entry_data[:4])[0]
526 header = INDEX_HEADER.unpack(entry_data[:4])[0]
527 self._initempty = False
527 self._initempty = False
528 else:
528 else:
529 header = new_header
529 header = new_header
530
530
531 self._format_flags = header & ~0xFFFF
531 self._format_flags = header & ~0xFFFF
532 self._format_version = header & 0xFFFF
532 self._format_version = header & 0xFFFF
533
533
534 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
534 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
535 if supported_flags is None:
535 if supported_flags is None:
536 msg = _(b'unknown version (%d) in revlog %s')
536 msg = _(b'unknown version (%d) in revlog %s')
537 msg %= (self._format_version, self.display_id)
537 msg %= (self._format_version, self.display_id)
538 raise error.RevlogError(msg)
538 raise error.RevlogError(msg)
539 elif self._format_flags & ~supported_flags:
539 elif self._format_flags & ~supported_flags:
540 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
540 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
541 display_flag = self._format_flags >> 16
541 display_flag = self._format_flags >> 16
542 msg %= (display_flag, self._format_version, self.display_id)
542 msg %= (display_flag, self._format_version, self.display_id)
543 raise error.RevlogError(msg)
543 raise error.RevlogError(msg)
544
544
545 features = FEATURES_BY_VERSION[self._format_version]
545 features = FEATURES_BY_VERSION[self._format_version]
546 self._inline = features[b'inline'](self._format_flags)
546 self._inline = features[b'inline'](self._format_flags)
547 self._generaldelta = features[b'generaldelta'](self._format_flags)
547 self._generaldelta = features[b'generaldelta'](self._format_flags)
548 self.hassidedata = features[b'sidedata']
548 self.hassidedata = features[b'sidedata']
549
549
550 if not features[b'docket']:
550 if not features[b'docket']:
551 self._indexfile = entry_point
551 self._indexfile = entry_point
552 index_data = entry_data
552 index_data = entry_data
553 else:
553 else:
554 self._docket_file = entry_point
554 self._docket_file = entry_point
555 if self._initempty:
555 if self._initempty:
556 self._docket = docketutil.default_docket(self, header)
556 self._docket = docketutil.default_docket(self, header)
557 else:
557 else:
558 self._docket = docketutil.parse_docket(
558 self._docket = docketutil.parse_docket(
559 self, entry_data, use_pending=self._trypending
559 self, entry_data, use_pending=self._trypending
560 )
560 )
561 self._indexfile = self._docket.index_filepath()
561 self._indexfile = self._docket.index_filepath()
562 index_data = b''
562 index_data = b''
563 index_size = self._docket.index_end
563 index_size = self._docket.index_end
564 if index_size > 0:
564 if index_size > 0:
565 index_data = self._get_data(
565 index_data = self._get_data(
566 self._indexfile, mmapindexthreshold, size=index_size
566 self._indexfile, mmapindexthreshold, size=index_size
567 )
567 )
568 if len(index_data) < index_size:
568 if len(index_data) < index_size:
569 msg = _(b'too few index data for %s: got %d, expected %d')
569 msg = _(b'too few index data for %s: got %d, expected %d')
570 msg %= (self.display_id, len(index_data), index_size)
570 msg %= (self.display_id, len(index_data), index_size)
571 raise error.RevlogError(msg)
571 raise error.RevlogError(msg)
572
572
573 self._inline = False
573 self._inline = False
574 # generaldelta implied by version 2 revlogs.
574 # generaldelta implied by version 2 revlogs.
575 self._generaldelta = True
575 self._generaldelta = True
576 # the logic for persistent nodemap will be dealt with within the
576 # the logic for persistent nodemap will be dealt with within the
577 # main docket, so disable it for now.
577 # main docket, so disable it for now.
578 self._nodemap_file = None
578 self._nodemap_file = None
579
579
580 if self._docket is not None:
580 if self._docket is not None:
581 self._datafile = self._docket.data_filepath()
581 self._datafile = self._docket.data_filepath()
582 self._sidedatafile = self._docket.sidedata_filepath()
582 self._sidedatafile = self._docket.sidedata_filepath()
583 elif self.postfix is None:
583 elif self.postfix is None:
584 self._datafile = b'%s.d' % self.radix
584 self._datafile = b'%s.d' % self.radix
585 else:
585 else:
586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
586 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
587
587
588 self.nodeconstants = sha1nodeconstants
588 self.nodeconstants = sha1nodeconstants
589 self.nullid = self.nodeconstants.nullid
589 self.nullid = self.nodeconstants.nullid
590
590
591 # sparse-revlog can't be on without general-delta (issue6056)
591 # sparse-revlog can't be on without general-delta (issue6056)
592 if not self._generaldelta:
592 if not self._generaldelta:
593 self._sparserevlog = False
593 self._sparserevlog = False
594
594
595 self._storedeltachains = True
595 self._storedeltachains = True
596
596
597 devel_nodemap = (
597 devel_nodemap = (
598 self._nodemap_file
598 self._nodemap_file
599 and force_nodemap
599 and force_nodemap
600 and parse_index_v1_nodemap is not None
600 and parse_index_v1_nodemap is not None
601 )
601 )
602
602
603 use_rust_index = False
603 use_rust_index = False
604 if rustrevlog is not None:
604 if rustrevlog is not None:
605 if self._nodemap_file is not None:
605 if self._nodemap_file is not None:
606 use_rust_index = True
606 use_rust_index = True
607 else:
607 else:
608 use_rust_index = self.opener.options.get(b'rust.index')
608 use_rust_index = self.opener.options.get(b'rust.index')
609
609
610 self._parse_index = parse_index_v1
610 self._parse_index = parse_index_v1
611 if self._format_version == REVLOGV0:
611 if self._format_version == REVLOGV0:
612 self._parse_index = revlogv0.parse_index_v0
612 self._parse_index = revlogv0.parse_index_v0
613 elif self._format_version == REVLOGV2:
613 elif self._format_version == REVLOGV2:
614 self._parse_index = parse_index_v2
614 self._parse_index = parse_index_v2
615 elif self._format_version == CHANGELOGV2:
615 elif self._format_version == CHANGELOGV2:
616 self._parse_index = parse_index_cl_v2
616 self._parse_index = parse_index_cl_v2
617 elif devel_nodemap:
617 elif devel_nodemap:
618 self._parse_index = parse_index_v1_nodemap
618 self._parse_index = parse_index_v1_nodemap
619 elif use_rust_index:
619 elif use_rust_index:
620 self._parse_index = parse_index_v1_mixed
620 self._parse_index = parse_index_v1_mixed
621 try:
621 try:
622 d = self._parse_index(index_data, self._inline)
622 d = self._parse_index(index_data, self._inline)
623 index, _chunkcache = d
623 index, _chunkcache = d
624 use_nodemap = (
624 use_nodemap = (
625 not self._inline
625 not self._inline
626 and self._nodemap_file is not None
626 and self._nodemap_file is not None
627 and util.safehasattr(index, 'update_nodemap_data')
627 and util.safehasattr(index, 'update_nodemap_data')
628 )
628 )
629 if use_nodemap:
629 if use_nodemap:
630 nodemap_data = nodemaputil.persisted_data(self)
630 nodemap_data = nodemaputil.persisted_data(self)
631 if nodemap_data is not None:
631 if nodemap_data is not None:
632 docket = nodemap_data[0]
632 docket = nodemap_data[0]
633 if (
633 if (
634 len(d[0]) > docket.tip_rev
634 len(d[0]) > docket.tip_rev
635 and d[0][docket.tip_rev][7] == docket.tip_node
635 and d[0][docket.tip_rev][7] == docket.tip_node
636 ):
636 ):
637 # no changelog tampering
637 # no changelog tampering
638 self._nodemap_docket = docket
638 self._nodemap_docket = docket
639 index.update_nodemap_data(*nodemap_data)
639 index.update_nodemap_data(*nodemap_data)
640 except (ValueError, IndexError):
640 except (ValueError, IndexError):
641 raise error.RevlogError(
641 raise error.RevlogError(
642 _(b"index %s is corrupted") % self.display_id
642 _(b"index %s is corrupted") % self.display_id
643 )
643 )
644 self.index, self._chunkcache = d
644 self.index, self._chunkcache = d
645 if not self._chunkcache:
645 if not self._chunkcache:
646 self._chunkclear()
646 self._chunkclear()
647 # revnum -> (chain-length, sum-delta-length)
647 # revnum -> (chain-length, sum-delta-length)
648 self._chaininfocache = util.lrucachedict(500)
648 self._chaininfocache = util.lrucachedict(500)
649 # revlog header -> revlog compressor
649 # revlog header -> revlog compressor
650 self._decompressors = {}
650 self._decompressors = {}
651
651
652 @util.propertycache
652 @util.propertycache
653 def revlog_kind(self):
653 def revlog_kind(self):
654 return self.target[0]
654 return self.target[0]
655
655
656 @util.propertycache
656 @util.propertycache
657 def display_id(self):
657 def display_id(self):
658 """The public facing "ID" of the revlog that we use in message"""
658 """The public facing "ID" of the revlog that we use in message"""
659 # Maybe we should build a user facing representation of
659 # Maybe we should build a user facing representation of
660 # revlog.target instead of using `self.radix`
660 # revlog.target instead of using `self.radix`
661 return self.radix
661 return self.radix
662
662
663 def _get_decompressor(self, t):
663 def _get_decompressor(self, t):
664 try:
664 try:
665 compressor = self._decompressors[t]
665 compressor = self._decompressors[t]
666 except KeyError:
666 except KeyError:
667 try:
667 try:
668 engine = util.compengines.forrevlogheader(t)
668 engine = util.compengines.forrevlogheader(t)
669 compressor = engine.revlogcompressor(self._compengineopts)
669 compressor = engine.revlogcompressor(self._compengineopts)
670 self._decompressors[t] = compressor
670 self._decompressors[t] = compressor
671 except KeyError:
671 except KeyError:
672 raise error.RevlogError(
672 raise error.RevlogError(
673 _(b'unknown compression type %s') % binascii.hexlify(t)
673 _(b'unknown compression type %s') % binascii.hexlify(t)
674 )
674 )
675 return compressor
675 return compressor
676
676
677 @util.propertycache
677 @util.propertycache
678 def _compressor(self):
678 def _compressor(self):
679 engine = util.compengines[self._compengine]
679 engine = util.compengines[self._compengine]
680 return engine.revlogcompressor(self._compengineopts)
680 return engine.revlogcompressor(self._compengineopts)
681
681
682 @util.propertycache
682 @util.propertycache
683 def _decompressor(self):
683 def _decompressor(self):
684 """the default decompressor"""
684 """the default decompressor"""
685 if self._docket is None:
685 if self._docket is None:
686 return None
686 return None
687 t = self._docket.default_compression_header
687 t = self._docket.default_compression_header
688 c = self._get_decompressor(t)
688 c = self._get_decompressor(t)
689 return c.decompress
689 return c.decompress
690
690
691 def _indexfp(self):
691 def _indexfp(self):
692 """file object for the revlog's index file"""
692 """file object for the revlog's index file"""
693 return self.opener(self._indexfile, mode=b"r")
693 return self.opener(self._indexfile, mode=b"r")
694
694
695 def __index_write_fp(self):
695 def __index_write_fp(self):
696 # You should not use this directly and use `_writing` instead
696 # You should not use this directly and use `_writing` instead
697 try:
697 try:
698 f = self.opener(
698 f = self.opener(
699 self._indexfile, mode=b"r+", checkambig=self._checkambig
699 self._indexfile, mode=b"r+", checkambig=self._checkambig
700 )
700 )
701 if self._docket is None:
701 if self._docket is None:
702 f.seek(0, os.SEEK_END)
702 f.seek(0, os.SEEK_END)
703 else:
703 else:
704 f.seek(self._docket.index_end, os.SEEK_SET)
704 f.seek(self._docket.index_end, os.SEEK_SET)
705 return f
705 return f
706 except IOError as inst:
706 except IOError as inst:
707 if inst.errno != errno.ENOENT:
707 if inst.errno != errno.ENOENT:
708 raise
708 raise
709 return self.opener(
709 return self.opener(
710 self._indexfile, mode=b"w+", checkambig=self._checkambig
710 self._indexfile, mode=b"w+", checkambig=self._checkambig
711 )
711 )
712
712
713 def __index_new_fp(self):
713 def __index_new_fp(self):
714 # You should not use this unless you are upgrading from inline revlog
714 # You should not use this unless you are upgrading from inline revlog
715 return self.opener(
715 return self.opener(
716 self._indexfile,
716 self._indexfile,
717 mode=b"w",
717 mode=b"w",
718 checkambig=self._checkambig,
718 checkambig=self._checkambig,
719 atomictemp=True,
719 atomictemp=True,
720 )
720 )
721
721
722 def _datafp(self, mode=b'r'):
722 def _datafp(self, mode=b'r'):
723 """file object for the revlog's data file"""
723 """file object for the revlog's data file"""
724 return self.opener(self._datafile, mode=mode)
724 return self.opener(self._datafile, mode=mode)
725
725
726 @contextlib.contextmanager
726 @contextlib.contextmanager
727 def _datareadfp(self, existingfp=None):
727 def _datareadfp(self, existingfp=None):
728 """file object suitable to read data"""
728 """file object suitable to read data"""
729 # Use explicit file handle, if given.
729 # Use explicit file handle, if given.
730 if existingfp is not None:
730 if existingfp is not None:
731 yield existingfp
731 yield existingfp
732
732
733 # Use a file handle being actively used for writes, if available.
733 # Use a file handle being actively used for writes, if available.
734 # There is some danger to doing this because reads will seek the
734 # There is some danger to doing this because reads will seek the
735 # file. However, _writeentry() performs a SEEK_END before all writes,
735 # file. However, _writeentry() performs a SEEK_END before all writes,
736 # so we should be safe.
736 # so we should be safe.
737 elif self._writinghandles:
737 elif self._writinghandles:
738 if self._inline:
738 if self._inline:
739 yield self._writinghandles[0]
739 yield self._writinghandles[0]
740 else:
740 else:
741 yield self._writinghandles[1]
741 yield self._writinghandles[1]
742
742
743 # Otherwise open a new file handle.
743 # Otherwise open a new file handle.
744 else:
744 else:
745 if self._inline:
745 if self._inline:
746 func = self._indexfp
746 func = self._indexfp
747 else:
747 else:
748 func = self._datafp
748 func = self._datafp
749 with func() as fp:
749 with func() as fp:
750 yield fp
750 yield fp
751
751
752 @contextlib.contextmanager
752 @contextlib.contextmanager
753 def _sidedatareadfp(self):
753 def _sidedatareadfp(self):
754 """file object suitable to read sidedata"""
754 """file object suitable to read sidedata"""
755 if self._writinghandles:
755 if self._writinghandles:
756 yield self._writinghandles[2]
756 yield self._writinghandles[2]
757 else:
757 else:
758 with self.opener(self._sidedatafile) as fp:
758 with self.opener(self._sidedatafile) as fp:
759 yield fp
759 yield fp
760
760
761 def tiprev(self):
761 def tiprev(self):
762 return len(self.index) - 1
762 return len(self.index) - 1
763
763
764 def tip(self):
764 def tip(self):
765 return self.node(self.tiprev())
765 return self.node(self.tiprev())
766
766
767 def __contains__(self, rev):
767 def __contains__(self, rev):
768 return 0 <= rev < len(self)
768 return 0 <= rev < len(self)
769
769
770 def __len__(self):
770 def __len__(self):
771 return len(self.index)
771 return len(self.index)
772
772
773 def __iter__(self):
773 def __iter__(self):
774 return iter(pycompat.xrange(len(self)))
774 return iter(pycompat.xrange(len(self)))
775
775
776 def revs(self, start=0, stop=None):
776 def revs(self, start=0, stop=None):
777 """iterate over all rev in this revlog (from start to stop)"""
777 """iterate over all rev in this revlog (from start to stop)"""
778 return storageutil.iterrevs(len(self), start=start, stop=stop)
778 return storageutil.iterrevs(len(self), start=start, stop=stop)
779
779
780 @property
780 @property
781 def nodemap(self):
781 def nodemap(self):
782 msg = (
782 msg = (
783 b"revlog.nodemap is deprecated, "
783 b"revlog.nodemap is deprecated, "
784 b"use revlog.index.[has_node|rev|get_rev]"
784 b"use revlog.index.[has_node|rev|get_rev]"
785 )
785 )
786 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
786 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
787 return self.index.nodemap
787 return self.index.nodemap
788
788
789 @property
789 @property
790 def _nodecache(self):
790 def _nodecache(self):
791 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
791 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
792 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
792 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
793 return self.index.nodemap
793 return self.index.nodemap
794
794
795 def hasnode(self, node):
795 def hasnode(self, node):
796 try:
796 try:
797 self.rev(node)
797 self.rev(node)
798 return True
798 return True
799 except KeyError:
799 except KeyError:
800 return False
800 return False
801
801
802 def candelta(self, baserev, rev):
802 def candelta(self, baserev, rev):
803 """whether two revisions (baserev, rev) can be delta-ed or not"""
803 """whether two revisions (baserev, rev) can be delta-ed or not"""
804 # Disable delta if either rev requires a content-changing flag
804 # Disable delta if either rev requires a content-changing flag
805 # processor (ex. LFS). This is because such flag processor can alter
805 # processor (ex. LFS). This is because such flag processor can alter
806 # the rawtext content that the delta will be based on, and two clients
806 # the rawtext content that the delta will be based on, and two clients
807 # could have a same revlog node with different flags (i.e. different
807 # could have a same revlog node with different flags (i.e. different
808 # rawtext contents) and the delta could be incompatible.
808 # rawtext contents) and the delta could be incompatible.
809 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
809 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
810 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
810 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
811 ):
811 ):
812 return False
812 return False
813 return True
813 return True
814
814
815 def update_caches(self, transaction):
815 def update_caches(self, transaction):
816 if self._nodemap_file is not None:
816 if self._nodemap_file is not None:
817 if transaction is None:
817 if transaction is None:
818 nodemaputil.update_persistent_nodemap(self)
818 nodemaputil.update_persistent_nodemap(self)
819 else:
819 else:
820 nodemaputil.setup_persistent_nodemap(transaction, self)
820 nodemaputil.setup_persistent_nodemap(transaction, self)
821
821
822 def clearcaches(self):
822 def clearcaches(self):
823 self._revisioncache = None
823 self._revisioncache = None
824 self._chainbasecache.clear()
824 self._chainbasecache.clear()
825 self._chunkcache = (0, b'')
825 self._chunkcache = (0, b'')
826 self._pcache = {}
826 self._pcache = {}
827 self._nodemap_docket = None
827 self._nodemap_docket = None
828 self.index.clearcaches()
828 self.index.clearcaches()
829 # The python code is the one responsible for validating the docket, we
829 # The python code is the one responsible for validating the docket, we
830 # end up having to refresh it here.
830 # end up having to refresh it here.
831 use_nodemap = (
831 use_nodemap = (
832 not self._inline
832 not self._inline
833 and self._nodemap_file is not None
833 and self._nodemap_file is not None
834 and util.safehasattr(self.index, 'update_nodemap_data')
834 and util.safehasattr(self.index, 'update_nodemap_data')
835 )
835 )
836 if use_nodemap:
836 if use_nodemap:
837 nodemap_data = nodemaputil.persisted_data(self)
837 nodemap_data = nodemaputil.persisted_data(self)
838 if nodemap_data is not None:
838 if nodemap_data is not None:
839 self._nodemap_docket = nodemap_data[0]
839 self._nodemap_docket = nodemap_data[0]
840 self.index.update_nodemap_data(*nodemap_data)
840 self.index.update_nodemap_data(*nodemap_data)
841
841
842 def rev(self, node):
842 def rev(self, node):
843 try:
843 try:
844 return self.index.rev(node)
844 return self.index.rev(node)
845 except TypeError:
845 except TypeError:
846 raise
846 raise
847 except error.RevlogError:
847 except error.RevlogError:
848 # parsers.c radix tree lookup failed
848 # parsers.c radix tree lookup failed
849 if (
849 if (
850 node == self.nodeconstants.wdirid
850 node == self.nodeconstants.wdirid
851 or node in self.nodeconstants.wdirfilenodeids
851 or node in self.nodeconstants.wdirfilenodeids
852 ):
852 ):
853 raise error.WdirUnsupported
853 raise error.WdirUnsupported
854 raise error.LookupError(node, self.display_id, _(b'no node'))
854 raise error.LookupError(node, self.display_id, _(b'no node'))
855
855
856 # Accessors for index entries.
856 # Accessors for index entries.
857
857
858 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
858 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
859 # are flags.
859 # are flags.
860 def start(self, rev):
860 def start(self, rev):
861 return int(self.index[rev][0] >> 16)
861 return int(self.index[rev][0] >> 16)
862
862
863 def sidedata_cut_off(self, rev):
863 def sidedata_cut_off(self, rev):
864 sd_cut_off = self.index[rev][8]
864 sd_cut_off = self.index[rev][8]
865 if sd_cut_off != 0:
865 if sd_cut_off != 0:
866 return sd_cut_off
866 return sd_cut_off
867 # This is some annoying dance, because entries without sidedata
867 # This is some annoying dance, because entries without sidedata
868 # currently use 0 as their ofsset. (instead of previous-offset +
868 # currently use 0 as their ofsset. (instead of previous-offset +
869 # previous-size)
869 # previous-size)
870 #
870 #
871 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
871 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
872 # In the meantime, we need this.
872 # In the meantime, we need this.
873 while 0 <= rev:
873 while 0 <= rev:
874 e = self.index[rev]
874 e = self.index[rev]
875 if e[9] != 0:
875 if e[9] != 0:
876 return e[8] + e[9]
876 return e[8] + e[9]
877 rev -= 1
877 rev -= 1
878 return 0
878 return 0
879
879
880 def flags(self, rev):
880 def flags(self, rev):
881 return self.index[rev][0] & 0xFFFF
881 return self.index[rev][0] & 0xFFFF
882
882
883 def length(self, rev):
883 def length(self, rev):
884 return self.index[rev][1]
884 return self.index[rev][1]
885
885
886 def sidedata_length(self, rev):
886 def sidedata_length(self, rev):
887 if not self.hassidedata:
887 if not self.hassidedata:
888 return 0
888 return 0
889 return self.index[rev][9]
889 return self.index[rev][9]
890
890
891 def rawsize(self, rev):
891 def rawsize(self, rev):
892 """return the length of the uncompressed text for a given revision"""
892 """return the length of the uncompressed text for a given revision"""
893 l = self.index[rev][2]
893 l = self.index[rev][2]
894 if l >= 0:
894 if l >= 0:
895 return l
895 return l
896
896
897 t = self.rawdata(rev)
897 t = self.rawdata(rev)
898 return len(t)
898 return len(t)
899
899
900 def size(self, rev):
900 def size(self, rev):
901 """length of non-raw text (processed by a "read" flag processor)"""
901 """length of non-raw text (processed by a "read" flag processor)"""
902 # fast path: if no "read" flag processor could change the content,
902 # fast path: if no "read" flag processor could change the content,
903 # size is rawsize. note: ELLIPSIS is known to not change the content.
903 # size is rawsize. note: ELLIPSIS is known to not change the content.
904 flags = self.flags(rev)
904 flags = self.flags(rev)
905 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
905 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
906 return self.rawsize(rev)
906 return self.rawsize(rev)
907
907
908 return len(self.revision(rev, raw=False))
908 return len(self.revision(rev, raw=False))
909
909
910 def chainbase(self, rev):
910 def chainbase(self, rev):
911 base = self._chainbasecache.get(rev)
911 base = self._chainbasecache.get(rev)
912 if base is not None:
912 if base is not None:
913 return base
913 return base
914
914
915 index = self.index
915 index = self.index
916 iterrev = rev
916 iterrev = rev
917 base = index[iterrev][3]
917 base = index[iterrev][3]
918 while base != iterrev:
918 while base != iterrev:
919 iterrev = base
919 iterrev = base
920 base = index[iterrev][3]
920 base = index[iterrev][3]
921
921
922 self._chainbasecache[rev] = base
922 self._chainbasecache[rev] = base
923 return base
923 return base
924
924
925 def linkrev(self, rev):
925 def linkrev(self, rev):
926 return self.index[rev][4]
926 return self.index[rev][4]
927
927
928 def parentrevs(self, rev):
928 def parentrevs(self, rev):
929 try:
929 try:
930 entry = self.index[rev]
930 entry = self.index[rev]
931 except IndexError:
931 except IndexError:
932 if rev == wdirrev:
932 if rev == wdirrev:
933 raise error.WdirUnsupported
933 raise error.WdirUnsupported
934 raise
934 raise
935 if entry[5] == nullrev:
935 if entry[5] == nullrev:
936 return entry[6], entry[5]
936 return entry[6], entry[5]
937 else:
937 else:
938 return entry[5], entry[6]
938 return entry[5], entry[6]
939
939
940 # fast parentrevs(rev) where rev isn't filtered
940 # fast parentrevs(rev) where rev isn't filtered
941 _uncheckedparentrevs = parentrevs
941 _uncheckedparentrevs = parentrevs
942
942
943 def node(self, rev):
943 def node(self, rev):
944 try:
944 try:
945 return self.index[rev][7]
945 return self.index[rev][7]
946 except IndexError:
946 except IndexError:
947 if rev == wdirrev:
947 if rev == wdirrev:
948 raise error.WdirUnsupported
948 raise error.WdirUnsupported
949 raise
949 raise
950
950
951 # Derived from index values.
951 # Derived from index values.
952
952
953 def end(self, rev):
953 def end(self, rev):
954 return self.start(rev) + self.length(rev)
954 return self.start(rev) + self.length(rev)
955
955
956 def parents(self, node):
956 def parents(self, node):
957 i = self.index
957 i = self.index
958 d = i[self.rev(node)]
958 d = i[self.rev(node)]
959 # inline node() to avoid function call overhead
959 # inline node() to avoid function call overhead
960 if d[5] == self.nullid:
960 if d[5] == self.nullid:
961 return i[d[6]][7], i[d[5]][7]
961 return i[d[6]][7], i[d[5]][7]
962 else:
962 else:
963 return i[d[5]][7], i[d[6]][7]
963 return i[d[5]][7], i[d[6]][7]
964
964
965 def chainlen(self, rev):
965 def chainlen(self, rev):
966 return self._chaininfo(rev)[0]
966 return self._chaininfo(rev)[0]
967
967
968 def _chaininfo(self, rev):
968 def _chaininfo(self, rev):
969 chaininfocache = self._chaininfocache
969 chaininfocache = self._chaininfocache
970 if rev in chaininfocache:
970 if rev in chaininfocache:
971 return chaininfocache[rev]
971 return chaininfocache[rev]
972 index = self.index
972 index = self.index
973 generaldelta = self._generaldelta
973 generaldelta = self._generaldelta
974 iterrev = rev
974 iterrev = rev
975 e = index[iterrev]
975 e = index[iterrev]
976 clen = 0
976 clen = 0
977 compresseddeltalen = 0
977 compresseddeltalen = 0
978 while iterrev != e[3]:
978 while iterrev != e[3]:
979 clen += 1
979 clen += 1
980 compresseddeltalen += e[1]
980 compresseddeltalen += e[1]
981 if generaldelta:
981 if generaldelta:
982 iterrev = e[3]
982 iterrev = e[3]
983 else:
983 else:
984 iterrev -= 1
984 iterrev -= 1
985 if iterrev in chaininfocache:
985 if iterrev in chaininfocache:
986 t = chaininfocache[iterrev]
986 t = chaininfocache[iterrev]
987 clen += t[0]
987 clen += t[0]
988 compresseddeltalen += t[1]
988 compresseddeltalen += t[1]
989 break
989 break
990 e = index[iterrev]
990 e = index[iterrev]
991 else:
991 else:
992 # Add text length of base since decompressing that also takes
992 # Add text length of base since decompressing that also takes
993 # work. For cache hits the length is already included.
993 # work. For cache hits the length is already included.
994 compresseddeltalen += e[1]
994 compresseddeltalen += e[1]
995 r = (clen, compresseddeltalen)
995 r = (clen, compresseddeltalen)
996 chaininfocache[rev] = r
996 chaininfocache[rev] = r
997 return r
997 return r
998
998
999 def _deltachain(self, rev, stoprev=None):
999 def _deltachain(self, rev, stoprev=None):
1000 """Obtain the delta chain for a revision.
1000 """Obtain the delta chain for a revision.
1001
1001
1002 ``stoprev`` specifies a revision to stop at. If not specified, we
1002 ``stoprev`` specifies a revision to stop at. If not specified, we
1003 stop at the base of the chain.
1003 stop at the base of the chain.
1004
1004
1005 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1005 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1006 revs in ascending order and ``stopped`` is a bool indicating whether
1006 revs in ascending order and ``stopped`` is a bool indicating whether
1007 ``stoprev`` was hit.
1007 ``stoprev`` was hit.
1008 """
1008 """
1009 # Try C implementation.
1009 # Try C implementation.
1010 try:
1010 try:
1011 return self.index.deltachain(rev, stoprev, self._generaldelta)
1011 return self.index.deltachain(rev, stoprev, self._generaldelta)
1012 except AttributeError:
1012 except AttributeError:
1013 pass
1013 pass
1014
1014
1015 chain = []
1015 chain = []
1016
1016
1017 # Alias to prevent attribute lookup in tight loop.
1017 # Alias to prevent attribute lookup in tight loop.
1018 index = self.index
1018 index = self.index
1019 generaldelta = self._generaldelta
1019 generaldelta = self._generaldelta
1020
1020
1021 iterrev = rev
1021 iterrev = rev
1022 e = index[iterrev]
1022 e = index[iterrev]
1023 while iterrev != e[3] and iterrev != stoprev:
1023 while iterrev != e[3] and iterrev != stoprev:
1024 chain.append(iterrev)
1024 chain.append(iterrev)
1025 if generaldelta:
1025 if generaldelta:
1026 iterrev = e[3]
1026 iterrev = e[3]
1027 else:
1027 else:
1028 iterrev -= 1
1028 iterrev -= 1
1029 e = index[iterrev]
1029 e = index[iterrev]
1030
1030
1031 if iterrev == stoprev:
1031 if iterrev == stoprev:
1032 stopped = True
1032 stopped = True
1033 else:
1033 else:
1034 chain.append(iterrev)
1034 chain.append(iterrev)
1035 stopped = False
1035 stopped = False
1036
1036
1037 chain.reverse()
1037 chain.reverse()
1038 return chain, stopped
1038 return chain, stopped
1039
1039
1040 def ancestors(self, revs, stoprev=0, inclusive=False):
1040 def ancestors(self, revs, stoprev=0, inclusive=False):
1041 """Generate the ancestors of 'revs' in reverse revision order.
1041 """Generate the ancestors of 'revs' in reverse revision order.
1042 Does not generate revs lower than stoprev.
1042 Does not generate revs lower than stoprev.
1043
1043
1044 See the documentation for ancestor.lazyancestors for more details."""
1044 See the documentation for ancestor.lazyancestors for more details."""
1045
1045
1046 # first, make sure start revisions aren't filtered
1046 # first, make sure start revisions aren't filtered
1047 revs = list(revs)
1047 revs = list(revs)
1048 checkrev = self.node
1048 checkrev = self.node
1049 for r in revs:
1049 for r in revs:
1050 checkrev(r)
1050 checkrev(r)
1051 # and we're sure ancestors aren't filtered as well
1051 # and we're sure ancestors aren't filtered as well
1052
1052
1053 if rustancestor is not None and self.index.rust_ext_compat:
1053 if rustancestor is not None and self.index.rust_ext_compat:
1054 lazyancestors = rustancestor.LazyAncestors
1054 lazyancestors = rustancestor.LazyAncestors
1055 arg = self.index
1055 arg = self.index
1056 else:
1056 else:
1057 lazyancestors = ancestor.lazyancestors
1057 lazyancestors = ancestor.lazyancestors
1058 arg = self._uncheckedparentrevs
1058 arg = self._uncheckedparentrevs
1059 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1059 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1060
1060
1061 def descendants(self, revs):
1061 def descendants(self, revs):
1062 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1062 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1063
1063
1064 def findcommonmissing(self, common=None, heads=None):
1064 def findcommonmissing(self, common=None, heads=None):
1065 """Return a tuple of the ancestors of common and the ancestors of heads
1065 """Return a tuple of the ancestors of common and the ancestors of heads
1066 that are not ancestors of common. In revset terminology, we return the
1066 that are not ancestors of common. In revset terminology, we return the
1067 tuple:
1067 tuple:
1068
1068
1069 ::common, (::heads) - (::common)
1069 ::common, (::heads) - (::common)
1070
1070
1071 The list is sorted by revision number, meaning it is
1071 The list is sorted by revision number, meaning it is
1072 topologically sorted.
1072 topologically sorted.
1073
1073
1074 'heads' and 'common' are both lists of node IDs. If heads is
1074 'heads' and 'common' are both lists of node IDs. If heads is
1075 not supplied, uses all of the revlog's heads. If common is not
1075 not supplied, uses all of the revlog's heads. If common is not
1076 supplied, uses nullid."""
1076 supplied, uses nullid."""
1077 if common is None:
1077 if common is None:
1078 common = [self.nullid]
1078 common = [self.nullid]
1079 if heads is None:
1079 if heads is None:
1080 heads = self.heads()
1080 heads = self.heads()
1081
1081
1082 common = [self.rev(n) for n in common]
1082 common = [self.rev(n) for n in common]
1083 heads = [self.rev(n) for n in heads]
1083 heads = [self.rev(n) for n in heads]
1084
1084
1085 # we want the ancestors, but inclusive
1085 # we want the ancestors, but inclusive
1086 class lazyset(object):
1086 class lazyset(object):
1087 def __init__(self, lazyvalues):
1087 def __init__(self, lazyvalues):
1088 self.addedvalues = set()
1088 self.addedvalues = set()
1089 self.lazyvalues = lazyvalues
1089 self.lazyvalues = lazyvalues
1090
1090
1091 def __contains__(self, value):
1091 def __contains__(self, value):
1092 return value in self.addedvalues or value in self.lazyvalues
1092 return value in self.addedvalues or value in self.lazyvalues
1093
1093
1094 def __iter__(self):
1094 def __iter__(self):
1095 added = self.addedvalues
1095 added = self.addedvalues
1096 for r in added:
1096 for r in added:
1097 yield r
1097 yield r
1098 for r in self.lazyvalues:
1098 for r in self.lazyvalues:
1099 if not r in added:
1099 if not r in added:
1100 yield r
1100 yield r
1101
1101
1102 def add(self, value):
1102 def add(self, value):
1103 self.addedvalues.add(value)
1103 self.addedvalues.add(value)
1104
1104
1105 def update(self, values):
1105 def update(self, values):
1106 self.addedvalues.update(values)
1106 self.addedvalues.update(values)
1107
1107
1108 has = lazyset(self.ancestors(common))
1108 has = lazyset(self.ancestors(common))
1109 has.add(nullrev)
1109 has.add(nullrev)
1110 has.update(common)
1110 has.update(common)
1111
1111
1112 # take all ancestors from heads that aren't in has
1112 # take all ancestors from heads that aren't in has
1113 missing = set()
1113 missing = set()
1114 visit = collections.deque(r for r in heads if r not in has)
1114 visit = collections.deque(r for r in heads if r not in has)
1115 while visit:
1115 while visit:
1116 r = visit.popleft()
1116 r = visit.popleft()
1117 if r in missing:
1117 if r in missing:
1118 continue
1118 continue
1119 else:
1119 else:
1120 missing.add(r)
1120 missing.add(r)
1121 for p in self.parentrevs(r):
1121 for p in self.parentrevs(r):
1122 if p not in has:
1122 if p not in has:
1123 visit.append(p)
1123 visit.append(p)
1124 missing = list(missing)
1124 missing = list(missing)
1125 missing.sort()
1125 missing.sort()
1126 return has, [self.node(miss) for miss in missing]
1126 return has, [self.node(miss) for miss in missing]
1127
1127
1128 def incrementalmissingrevs(self, common=None):
1128 def incrementalmissingrevs(self, common=None):
1129 """Return an object that can be used to incrementally compute the
1129 """Return an object that can be used to incrementally compute the
1130 revision numbers of the ancestors of arbitrary sets that are not
1130 revision numbers of the ancestors of arbitrary sets that are not
1131 ancestors of common. This is an ancestor.incrementalmissingancestors
1131 ancestors of common. This is an ancestor.incrementalmissingancestors
1132 object.
1132 object.
1133
1133
1134 'common' is a list of revision numbers. If common is not supplied, uses
1134 'common' is a list of revision numbers. If common is not supplied, uses
1135 nullrev.
1135 nullrev.
1136 """
1136 """
1137 if common is None:
1137 if common is None:
1138 common = [nullrev]
1138 common = [nullrev]
1139
1139
1140 if rustancestor is not None and self.index.rust_ext_compat:
1140 if rustancestor is not None and self.index.rust_ext_compat:
1141 return rustancestor.MissingAncestors(self.index, common)
1141 return rustancestor.MissingAncestors(self.index, common)
1142 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1142 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1143
1143
1144 def findmissingrevs(self, common=None, heads=None):
1144 def findmissingrevs(self, common=None, heads=None):
1145 """Return the revision numbers of the ancestors of heads that
1145 """Return the revision numbers of the ancestors of heads that
1146 are not ancestors of common.
1146 are not ancestors of common.
1147
1147
1148 More specifically, return a list of revision numbers corresponding to
1148 More specifically, return a list of revision numbers corresponding to
1149 nodes N such that every N satisfies the following constraints:
1149 nodes N such that every N satisfies the following constraints:
1150
1150
1151 1. N is an ancestor of some node in 'heads'
1151 1. N is an ancestor of some node in 'heads'
1152 2. N is not an ancestor of any node in 'common'
1152 2. N is not an ancestor of any node in 'common'
1153
1153
1154 The list is sorted by revision number, meaning it is
1154 The list is sorted by revision number, meaning it is
1155 topologically sorted.
1155 topologically sorted.
1156
1156
1157 'heads' and 'common' are both lists of revision numbers. If heads is
1157 'heads' and 'common' are both lists of revision numbers. If heads is
1158 not supplied, uses all of the revlog's heads. If common is not
1158 not supplied, uses all of the revlog's heads. If common is not
1159 supplied, uses nullid."""
1159 supplied, uses nullid."""
1160 if common is None:
1160 if common is None:
1161 common = [nullrev]
1161 common = [nullrev]
1162 if heads is None:
1162 if heads is None:
1163 heads = self.headrevs()
1163 heads = self.headrevs()
1164
1164
1165 inc = self.incrementalmissingrevs(common=common)
1165 inc = self.incrementalmissingrevs(common=common)
1166 return inc.missingancestors(heads)
1166 return inc.missingancestors(heads)
1167
1167
1168 def findmissing(self, common=None, heads=None):
1168 def findmissing(self, common=None, heads=None):
1169 """Return the ancestors of heads that are not ancestors of common.
1169 """Return the ancestors of heads that are not ancestors of common.
1170
1170
1171 More specifically, return a list of nodes N such that every N
1171 More specifically, return a list of nodes N such that every N
1172 satisfies the following constraints:
1172 satisfies the following constraints:
1173
1173
1174 1. N is an ancestor of some node in 'heads'
1174 1. N is an ancestor of some node in 'heads'
1175 2. N is not an ancestor of any node in 'common'
1175 2. N is not an ancestor of any node in 'common'
1176
1176
1177 The list is sorted by revision number, meaning it is
1177 The list is sorted by revision number, meaning it is
1178 topologically sorted.
1178 topologically sorted.
1179
1179
1180 'heads' and 'common' are both lists of node IDs. If heads is
1180 'heads' and 'common' are both lists of node IDs. If heads is
1181 not supplied, uses all of the revlog's heads. If common is not
1181 not supplied, uses all of the revlog's heads. If common is not
1182 supplied, uses nullid."""
1182 supplied, uses nullid."""
1183 if common is None:
1183 if common is None:
1184 common = [self.nullid]
1184 common = [self.nullid]
1185 if heads is None:
1185 if heads is None:
1186 heads = self.heads()
1186 heads = self.heads()
1187
1187
1188 common = [self.rev(n) for n in common]
1188 common = [self.rev(n) for n in common]
1189 heads = [self.rev(n) for n in heads]
1189 heads = [self.rev(n) for n in heads]
1190
1190
1191 inc = self.incrementalmissingrevs(common=common)
1191 inc = self.incrementalmissingrevs(common=common)
1192 return [self.node(r) for r in inc.missingancestors(heads)]
1192 return [self.node(r) for r in inc.missingancestors(heads)]
1193
1193
1194 def nodesbetween(self, roots=None, heads=None):
1194 def nodesbetween(self, roots=None, heads=None):
1195 """Return a topological path from 'roots' to 'heads'.
1195 """Return a topological path from 'roots' to 'heads'.
1196
1196
1197 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1197 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1198 topologically sorted list of all nodes N that satisfy both of
1198 topologically sorted list of all nodes N that satisfy both of
1199 these constraints:
1199 these constraints:
1200
1200
1201 1. N is a descendant of some node in 'roots'
1201 1. N is a descendant of some node in 'roots'
1202 2. N is an ancestor of some node in 'heads'
1202 2. N is an ancestor of some node in 'heads'
1203
1203
1204 Every node is considered to be both a descendant and an ancestor
1204 Every node is considered to be both a descendant and an ancestor
1205 of itself, so every reachable node in 'roots' and 'heads' will be
1205 of itself, so every reachable node in 'roots' and 'heads' will be
1206 included in 'nodes'.
1206 included in 'nodes'.
1207
1207
1208 'outroots' is the list of reachable nodes in 'roots', i.e., the
1208 'outroots' is the list of reachable nodes in 'roots', i.e., the
1209 subset of 'roots' that is returned in 'nodes'. Likewise,
1209 subset of 'roots' that is returned in 'nodes'. Likewise,
1210 'outheads' is the subset of 'heads' that is also in 'nodes'.
1210 'outheads' is the subset of 'heads' that is also in 'nodes'.
1211
1211
1212 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1212 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1213 unspecified, uses nullid as the only root. If 'heads' is
1213 unspecified, uses nullid as the only root. If 'heads' is
1214 unspecified, uses list of all of the revlog's heads."""
1214 unspecified, uses list of all of the revlog's heads."""
1215 nonodes = ([], [], [])
1215 nonodes = ([], [], [])
1216 if roots is not None:
1216 if roots is not None:
1217 roots = list(roots)
1217 roots = list(roots)
1218 if not roots:
1218 if not roots:
1219 return nonodes
1219 return nonodes
1220 lowestrev = min([self.rev(n) for n in roots])
1220 lowestrev = min([self.rev(n) for n in roots])
1221 else:
1221 else:
1222 roots = [self.nullid] # Everybody's a descendant of nullid
1222 roots = [self.nullid] # Everybody's a descendant of nullid
1223 lowestrev = nullrev
1223 lowestrev = nullrev
1224 if (lowestrev == nullrev) and (heads is None):
1224 if (lowestrev == nullrev) and (heads is None):
1225 # We want _all_ the nodes!
1225 # We want _all_ the nodes!
1226 return (
1226 return (
1227 [self.node(r) for r in self],
1227 [self.node(r) for r in self],
1228 [self.nullid],
1228 [self.nullid],
1229 list(self.heads()),
1229 list(self.heads()),
1230 )
1230 )
1231 if heads is None:
1231 if heads is None:
1232 # All nodes are ancestors, so the latest ancestor is the last
1232 # All nodes are ancestors, so the latest ancestor is the last
1233 # node.
1233 # node.
1234 highestrev = len(self) - 1
1234 highestrev = len(self) - 1
1235 # Set ancestors to None to signal that every node is an ancestor.
1235 # Set ancestors to None to signal that every node is an ancestor.
1236 ancestors = None
1236 ancestors = None
1237 # Set heads to an empty dictionary for later discovery of heads
1237 # Set heads to an empty dictionary for later discovery of heads
1238 heads = {}
1238 heads = {}
1239 else:
1239 else:
1240 heads = list(heads)
1240 heads = list(heads)
1241 if not heads:
1241 if not heads:
1242 return nonodes
1242 return nonodes
1243 ancestors = set()
1243 ancestors = set()
1244 # Turn heads into a dictionary so we can remove 'fake' heads.
1244 # Turn heads into a dictionary so we can remove 'fake' heads.
1245 # Also, later we will be using it to filter out the heads we can't
1245 # Also, later we will be using it to filter out the heads we can't
1246 # find from roots.
1246 # find from roots.
1247 heads = dict.fromkeys(heads, False)
1247 heads = dict.fromkeys(heads, False)
1248 # Start at the top and keep marking parents until we're done.
1248 # Start at the top and keep marking parents until we're done.
1249 nodestotag = set(heads)
1249 nodestotag = set(heads)
1250 # Remember where the top was so we can use it as a limit later.
1250 # Remember where the top was so we can use it as a limit later.
1251 highestrev = max([self.rev(n) for n in nodestotag])
1251 highestrev = max([self.rev(n) for n in nodestotag])
1252 while nodestotag:
1252 while nodestotag:
1253 # grab a node to tag
1253 # grab a node to tag
1254 n = nodestotag.pop()
1254 n = nodestotag.pop()
1255 # Never tag nullid
1255 # Never tag nullid
1256 if n == self.nullid:
1256 if n == self.nullid:
1257 continue
1257 continue
1258 # A node's revision number represents its place in a
1258 # A node's revision number represents its place in a
1259 # topologically sorted list of nodes.
1259 # topologically sorted list of nodes.
1260 r = self.rev(n)
1260 r = self.rev(n)
1261 if r >= lowestrev:
1261 if r >= lowestrev:
1262 if n not in ancestors:
1262 if n not in ancestors:
1263 # If we are possibly a descendant of one of the roots
1263 # If we are possibly a descendant of one of the roots
1264 # and we haven't already been marked as an ancestor
1264 # and we haven't already been marked as an ancestor
1265 ancestors.add(n) # Mark as ancestor
1265 ancestors.add(n) # Mark as ancestor
1266 # Add non-nullid parents to list of nodes to tag.
1266 # Add non-nullid parents to list of nodes to tag.
1267 nodestotag.update(
1267 nodestotag.update(
1268 [p for p in self.parents(n) if p != self.nullid]
1268 [p for p in self.parents(n) if p != self.nullid]
1269 )
1269 )
1270 elif n in heads: # We've seen it before, is it a fake head?
1270 elif n in heads: # We've seen it before, is it a fake head?
1271 # So it is, real heads should not be the ancestors of
1271 # So it is, real heads should not be the ancestors of
1272 # any other heads.
1272 # any other heads.
1273 heads.pop(n)
1273 heads.pop(n)
1274 if not ancestors:
1274 if not ancestors:
1275 return nonodes
1275 return nonodes
1276 # Now that we have our set of ancestors, we want to remove any
1276 # Now that we have our set of ancestors, we want to remove any
1277 # roots that are not ancestors.
1277 # roots that are not ancestors.
1278
1278
1279 # If one of the roots was nullid, everything is included anyway.
1279 # If one of the roots was nullid, everything is included anyway.
1280 if lowestrev > nullrev:
1280 if lowestrev > nullrev:
1281 # But, since we weren't, let's recompute the lowest rev to not
1281 # But, since we weren't, let's recompute the lowest rev to not
1282 # include roots that aren't ancestors.
1282 # include roots that aren't ancestors.
1283
1283
1284 # Filter out roots that aren't ancestors of heads
1284 # Filter out roots that aren't ancestors of heads
1285 roots = [root for root in roots if root in ancestors]
1285 roots = [root for root in roots if root in ancestors]
1286 # Recompute the lowest revision
1286 # Recompute the lowest revision
1287 if roots:
1287 if roots:
1288 lowestrev = min([self.rev(root) for root in roots])
1288 lowestrev = min([self.rev(root) for root in roots])
1289 else:
1289 else:
1290 # No more roots? Return empty list
1290 # No more roots? Return empty list
1291 return nonodes
1291 return nonodes
1292 else:
1292 else:
1293 # We are descending from nullid, and don't need to care about
1293 # We are descending from nullid, and don't need to care about
1294 # any other roots.
1294 # any other roots.
1295 lowestrev = nullrev
1295 lowestrev = nullrev
1296 roots = [self.nullid]
1296 roots = [self.nullid]
1297 # Transform our roots list into a set.
1297 # Transform our roots list into a set.
1298 descendants = set(roots)
1298 descendants = set(roots)
1299 # Also, keep the original roots so we can filter out roots that aren't
1299 # Also, keep the original roots so we can filter out roots that aren't
1300 # 'real' roots (i.e. are descended from other roots).
1300 # 'real' roots (i.e. are descended from other roots).
1301 roots = descendants.copy()
1301 roots = descendants.copy()
1302 # Our topologically sorted list of output nodes.
1302 # Our topologically sorted list of output nodes.
1303 orderedout = []
1303 orderedout = []
1304 # Don't start at nullid since we don't want nullid in our output list,
1304 # Don't start at nullid since we don't want nullid in our output list,
1305 # and if nullid shows up in descendants, empty parents will look like
1305 # and if nullid shows up in descendants, empty parents will look like
1306 # they're descendants.
1306 # they're descendants.
1307 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1307 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1308 n = self.node(r)
1308 n = self.node(r)
1309 isdescendant = False
1309 isdescendant = False
1310 if lowestrev == nullrev: # Everybody is a descendant of nullid
1310 if lowestrev == nullrev: # Everybody is a descendant of nullid
1311 isdescendant = True
1311 isdescendant = True
1312 elif n in descendants:
1312 elif n in descendants:
1313 # n is already a descendant
1313 # n is already a descendant
1314 isdescendant = True
1314 isdescendant = True
1315 # This check only needs to be done here because all the roots
1315 # This check only needs to be done here because all the roots
1316 # will start being marked is descendants before the loop.
1316 # will start being marked is descendants before the loop.
1317 if n in roots:
1317 if n in roots:
1318 # If n was a root, check if it's a 'real' root.
1318 # If n was a root, check if it's a 'real' root.
1319 p = tuple(self.parents(n))
1319 p = tuple(self.parents(n))
1320 # If any of its parents are descendants, it's not a root.
1320 # If any of its parents are descendants, it's not a root.
1321 if (p[0] in descendants) or (p[1] in descendants):
1321 if (p[0] in descendants) or (p[1] in descendants):
1322 roots.remove(n)
1322 roots.remove(n)
1323 else:
1323 else:
1324 p = tuple(self.parents(n))
1324 p = tuple(self.parents(n))
1325 # A node is a descendant if either of its parents are
1325 # A node is a descendant if either of its parents are
1326 # descendants. (We seeded the dependents list with the roots
1326 # descendants. (We seeded the dependents list with the roots
1327 # up there, remember?)
1327 # up there, remember?)
1328 if (p[0] in descendants) or (p[1] in descendants):
1328 if (p[0] in descendants) or (p[1] in descendants):
1329 descendants.add(n)
1329 descendants.add(n)
1330 isdescendant = True
1330 isdescendant = True
1331 if isdescendant and ((ancestors is None) or (n in ancestors)):
1331 if isdescendant and ((ancestors is None) or (n in ancestors)):
1332 # Only include nodes that are both descendants and ancestors.
1332 # Only include nodes that are both descendants and ancestors.
1333 orderedout.append(n)
1333 orderedout.append(n)
1334 if (ancestors is not None) and (n in heads):
1334 if (ancestors is not None) and (n in heads):
1335 # We're trying to figure out which heads are reachable
1335 # We're trying to figure out which heads are reachable
1336 # from roots.
1336 # from roots.
1337 # Mark this head as having been reached
1337 # Mark this head as having been reached
1338 heads[n] = True
1338 heads[n] = True
1339 elif ancestors is None:
1339 elif ancestors is None:
1340 # Otherwise, we're trying to discover the heads.
1340 # Otherwise, we're trying to discover the heads.
1341 # Assume this is a head because if it isn't, the next step
1341 # Assume this is a head because if it isn't, the next step
1342 # will eventually remove it.
1342 # will eventually remove it.
1343 heads[n] = True
1343 heads[n] = True
1344 # But, obviously its parents aren't.
1344 # But, obviously its parents aren't.
1345 for p in self.parents(n):
1345 for p in self.parents(n):
1346 heads.pop(p, None)
1346 heads.pop(p, None)
1347 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1347 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1348 roots = list(roots)
1348 roots = list(roots)
1349 assert orderedout
1349 assert orderedout
1350 assert roots
1350 assert roots
1351 assert heads
1351 assert heads
1352 return (orderedout, roots, heads)
1352 return (orderedout, roots, heads)
1353
1353
1354 def headrevs(self, revs=None):
1354 def headrevs(self, revs=None):
1355 if revs is None:
1355 if revs is None:
1356 try:
1356 try:
1357 return self.index.headrevs()
1357 return self.index.headrevs()
1358 except AttributeError:
1358 except AttributeError:
1359 return self._headrevs()
1359 return self._headrevs()
1360 if rustdagop is not None and self.index.rust_ext_compat:
1360 if rustdagop is not None and self.index.rust_ext_compat:
1361 return rustdagop.headrevs(self.index, revs)
1361 return rustdagop.headrevs(self.index, revs)
1362 return dagop.headrevs(revs, self._uncheckedparentrevs)
1362 return dagop.headrevs(revs, self._uncheckedparentrevs)
1363
1363
1364 def computephases(self, roots):
1364 def computephases(self, roots):
1365 return self.index.computephasesmapsets(roots)
1365 return self.index.computephasesmapsets(roots)
1366
1366
1367 def _headrevs(self):
1367 def _headrevs(self):
1368 count = len(self)
1368 count = len(self)
1369 if not count:
1369 if not count:
1370 return [nullrev]
1370 return [nullrev]
1371 # we won't iter over filtered rev so nobody is a head at start
1371 # we won't iter over filtered rev so nobody is a head at start
1372 ishead = [0] * (count + 1)
1372 ishead = [0] * (count + 1)
1373 index = self.index
1373 index = self.index
1374 for r in self:
1374 for r in self:
1375 ishead[r] = 1 # I may be an head
1375 ishead[r] = 1 # I may be an head
1376 e = index[r]
1376 e = index[r]
1377 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1377 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1378 return [r for r, val in enumerate(ishead) if val]
1378 return [r for r, val in enumerate(ishead) if val]
1379
1379
1380 def heads(self, start=None, stop=None):
1380 def heads(self, start=None, stop=None):
1381 """return the list of all nodes that have no children
1381 """return the list of all nodes that have no children
1382
1382
1383 if start is specified, only heads that are descendants of
1383 if start is specified, only heads that are descendants of
1384 start will be returned
1384 start will be returned
1385 if stop is specified, it will consider all the revs from stop
1385 if stop is specified, it will consider all the revs from stop
1386 as if they had no children
1386 as if they had no children
1387 """
1387 """
1388 if start is None and stop is None:
1388 if start is None and stop is None:
1389 if not len(self):
1389 if not len(self):
1390 return [self.nullid]
1390 return [self.nullid]
1391 return [self.node(r) for r in self.headrevs()]
1391 return [self.node(r) for r in self.headrevs()]
1392
1392
1393 if start is None:
1393 if start is None:
1394 start = nullrev
1394 start = nullrev
1395 else:
1395 else:
1396 start = self.rev(start)
1396 start = self.rev(start)
1397
1397
1398 stoprevs = {self.rev(n) for n in stop or []}
1398 stoprevs = {self.rev(n) for n in stop or []}
1399
1399
1400 revs = dagop.headrevssubset(
1400 revs = dagop.headrevssubset(
1401 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1401 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1402 )
1402 )
1403
1403
1404 return [self.node(rev) for rev in revs]
1404 return [self.node(rev) for rev in revs]
1405
1405
1406 def children(self, node):
1406 def children(self, node):
1407 """find the children of a given node"""
1407 """find the children of a given node"""
1408 c = []
1408 c = []
1409 p = self.rev(node)
1409 p = self.rev(node)
1410 for r in self.revs(start=p + 1):
1410 for r in self.revs(start=p + 1):
1411 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1411 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1412 if prevs:
1412 if prevs:
1413 for pr in prevs:
1413 for pr in prevs:
1414 if pr == p:
1414 if pr == p:
1415 c.append(self.node(r))
1415 c.append(self.node(r))
1416 elif p == nullrev:
1416 elif p == nullrev:
1417 c.append(self.node(r))
1417 c.append(self.node(r))
1418 return c
1418 return c
1419
1419
1420 def commonancestorsheads(self, a, b):
1420 def commonancestorsheads(self, a, b):
1421 """calculate all the heads of the common ancestors of nodes a and b"""
1421 """calculate all the heads of the common ancestors of nodes a and b"""
1422 a, b = self.rev(a), self.rev(b)
1422 a, b = self.rev(a), self.rev(b)
1423 ancs = self._commonancestorsheads(a, b)
1423 ancs = self._commonancestorsheads(a, b)
1424 return pycompat.maplist(self.node, ancs)
1424 return pycompat.maplist(self.node, ancs)
1425
1425
1426 def _commonancestorsheads(self, *revs):
1426 def _commonancestorsheads(self, *revs):
1427 """calculate all the heads of the common ancestors of revs"""
1427 """calculate all the heads of the common ancestors of revs"""
1428 try:
1428 try:
1429 ancs = self.index.commonancestorsheads(*revs)
1429 ancs = self.index.commonancestorsheads(*revs)
1430 except (AttributeError, OverflowError): # C implementation failed
1430 except (AttributeError, OverflowError): # C implementation failed
1431 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1431 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1432 return ancs
1432 return ancs
1433
1433
1434 def isancestor(self, a, b):
1434 def isancestor(self, a, b):
1435 """return True if node a is an ancestor of node b
1435 """return True if node a is an ancestor of node b
1436
1436
1437 A revision is considered an ancestor of itself."""
1437 A revision is considered an ancestor of itself."""
1438 a, b = self.rev(a), self.rev(b)
1438 a, b = self.rev(a), self.rev(b)
1439 return self.isancestorrev(a, b)
1439 return self.isancestorrev(a, b)
1440
1440
1441 def isancestorrev(self, a, b):
1441 def isancestorrev(self, a, b):
1442 """return True if revision a is an ancestor of revision b
1442 """return True if revision a is an ancestor of revision b
1443
1443
1444 A revision is considered an ancestor of itself.
1444 A revision is considered an ancestor of itself.
1445
1445
1446 The implementation of this is trivial but the use of
1446 The implementation of this is trivial but the use of
1447 reachableroots is not."""
1447 reachableroots is not."""
1448 if a == nullrev:
1448 if a == nullrev:
1449 return True
1449 return True
1450 elif a == b:
1450 elif a == b:
1451 return True
1451 return True
1452 elif a > b:
1452 elif a > b:
1453 return False
1453 return False
1454 return bool(self.reachableroots(a, [b], [a], includepath=False))
1454 return bool(self.reachableroots(a, [b], [a], includepath=False))
1455
1455
1456 def reachableroots(self, minroot, heads, roots, includepath=False):
1456 def reachableroots(self, minroot, heads, roots, includepath=False):
1457 """return (heads(::(<roots> and <roots>::<heads>)))
1457 """return (heads(::(<roots> and <roots>::<heads>)))
1458
1458
1459 If includepath is True, return (<roots>::<heads>)."""
1459 If includepath is True, return (<roots>::<heads>)."""
1460 try:
1460 try:
1461 return self.index.reachableroots2(
1461 return self.index.reachableroots2(
1462 minroot, heads, roots, includepath
1462 minroot, heads, roots, includepath
1463 )
1463 )
1464 except AttributeError:
1464 except AttributeError:
1465 return dagop._reachablerootspure(
1465 return dagop._reachablerootspure(
1466 self.parentrevs, minroot, roots, heads, includepath
1466 self.parentrevs, minroot, roots, heads, includepath
1467 )
1467 )
1468
1468
1469 def ancestor(self, a, b):
1469 def ancestor(self, a, b):
1470 """calculate the "best" common ancestor of nodes a and b"""
1470 """calculate the "best" common ancestor of nodes a and b"""
1471
1471
1472 a, b = self.rev(a), self.rev(b)
1472 a, b = self.rev(a), self.rev(b)
1473 try:
1473 try:
1474 ancs = self.index.ancestors(a, b)
1474 ancs = self.index.ancestors(a, b)
1475 except (AttributeError, OverflowError):
1475 except (AttributeError, OverflowError):
1476 ancs = ancestor.ancestors(self.parentrevs, a, b)
1476 ancs = ancestor.ancestors(self.parentrevs, a, b)
1477 if ancs:
1477 if ancs:
1478 # choose a consistent winner when there's a tie
1478 # choose a consistent winner when there's a tie
1479 return min(map(self.node, ancs))
1479 return min(map(self.node, ancs))
1480 return self.nullid
1480 return self.nullid
1481
1481
1482 def _match(self, id):
1482 def _match(self, id):
1483 if isinstance(id, int):
1483 if isinstance(id, int):
1484 # rev
1484 # rev
1485 return self.node(id)
1485 return self.node(id)
1486 if len(id) == self.nodeconstants.nodelen:
1486 if len(id) == self.nodeconstants.nodelen:
1487 # possibly a binary node
1487 # possibly a binary node
1488 # odds of a binary node being all hex in ASCII are 1 in 10**25
1488 # odds of a binary node being all hex in ASCII are 1 in 10**25
1489 try:
1489 try:
1490 node = id
1490 node = id
1491 self.rev(node) # quick search the index
1491 self.rev(node) # quick search the index
1492 return node
1492 return node
1493 except error.LookupError:
1493 except error.LookupError:
1494 pass # may be partial hex id
1494 pass # may be partial hex id
1495 try:
1495 try:
1496 # str(rev)
1496 # str(rev)
1497 rev = int(id)
1497 rev = int(id)
1498 if b"%d" % rev != id:
1498 if b"%d" % rev != id:
1499 raise ValueError
1499 raise ValueError
1500 if rev < 0:
1500 if rev < 0:
1501 rev = len(self) + rev
1501 rev = len(self) + rev
1502 if rev < 0 or rev >= len(self):
1502 if rev < 0 or rev >= len(self):
1503 raise ValueError
1503 raise ValueError
1504 return self.node(rev)
1504 return self.node(rev)
1505 except (ValueError, OverflowError):
1505 except (ValueError, OverflowError):
1506 pass
1506 pass
1507 if len(id) == 2 * self.nodeconstants.nodelen:
1507 if len(id) == 2 * self.nodeconstants.nodelen:
1508 try:
1508 try:
1509 # a full hex nodeid?
1509 # a full hex nodeid?
1510 node = bin(id)
1510 node = bin(id)
1511 self.rev(node)
1511 self.rev(node)
1512 return node
1512 return node
1513 except (TypeError, error.LookupError):
1513 except (TypeError, error.LookupError):
1514 pass
1514 pass
1515
1515
1516 def _partialmatch(self, id):
1516 def _partialmatch(self, id):
1517 # we don't care wdirfilenodeids as they should be always full hash
1517 # we don't care wdirfilenodeids as they should be always full hash
1518 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1518 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1519 ambiguous = False
1519 ambiguous = False
1520 try:
1520 try:
1521 partial = self.index.partialmatch(id)
1521 partial = self.index.partialmatch(id)
1522 if partial and self.hasnode(partial):
1522 if partial and self.hasnode(partial):
1523 if maybewdir:
1523 if maybewdir:
1524 # single 'ff...' match in radix tree, ambiguous with wdir
1524 # single 'ff...' match in radix tree, ambiguous with wdir
1525 ambiguous = True
1525 ambiguous = True
1526 else:
1526 else:
1527 return partial
1527 return partial
1528 elif maybewdir:
1528 elif maybewdir:
1529 # no 'ff...' match in radix tree, wdir identified
1529 # no 'ff...' match in radix tree, wdir identified
1530 raise error.WdirUnsupported
1530 raise error.WdirUnsupported
1531 else:
1531 else:
1532 return None
1532 return None
1533 except error.RevlogError:
1533 except error.RevlogError:
1534 # parsers.c radix tree lookup gave multiple matches
1534 # parsers.c radix tree lookup gave multiple matches
1535 # fast path: for unfiltered changelog, radix tree is accurate
1535 # fast path: for unfiltered changelog, radix tree is accurate
1536 if not getattr(self, 'filteredrevs', None):
1536 if not getattr(self, 'filteredrevs', None):
1537 ambiguous = True
1537 ambiguous = True
1538 # fall through to slow path that filters hidden revisions
1538 # fall through to slow path that filters hidden revisions
1539 except (AttributeError, ValueError):
1539 except (AttributeError, ValueError):
1540 # we are pure python, or key was too short to search radix tree
1540 # we are pure python, or key was too short to search radix tree
1541 pass
1541 pass
1542 if ambiguous:
1542 if ambiguous:
1543 raise error.AmbiguousPrefixLookupError(
1543 raise error.AmbiguousPrefixLookupError(
1544 id, self.display_id, _(b'ambiguous identifier')
1544 id, self.display_id, _(b'ambiguous identifier')
1545 )
1545 )
1546
1546
1547 if id in self._pcache:
1547 if id in self._pcache:
1548 return self._pcache[id]
1548 return self._pcache[id]
1549
1549
1550 if len(id) <= 40:
1550 if len(id) <= 40:
1551 try:
1551 try:
1552 # hex(node)[:...]
1552 # hex(node)[:...]
1553 l = len(id) // 2 # grab an even number of digits
1553 l = len(id) // 2 # grab an even number of digits
1554 prefix = bin(id[: l * 2])
1554 prefix = bin(id[: l * 2])
1555 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1555 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1556 nl = [
1556 nl = [
1557 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1557 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1558 ]
1558 ]
1559 if self.nodeconstants.nullhex.startswith(id):
1559 if self.nodeconstants.nullhex.startswith(id):
1560 nl.append(self.nullid)
1560 nl.append(self.nullid)
1561 if len(nl) > 0:
1561 if len(nl) > 0:
1562 if len(nl) == 1 and not maybewdir:
1562 if len(nl) == 1 and not maybewdir:
1563 self._pcache[id] = nl[0]
1563 self._pcache[id] = nl[0]
1564 return nl[0]
1564 return nl[0]
1565 raise error.AmbiguousPrefixLookupError(
1565 raise error.AmbiguousPrefixLookupError(
1566 id, self.display_id, _(b'ambiguous identifier')
1566 id, self.display_id, _(b'ambiguous identifier')
1567 )
1567 )
1568 if maybewdir:
1568 if maybewdir:
1569 raise error.WdirUnsupported
1569 raise error.WdirUnsupported
1570 return None
1570 return None
1571 except TypeError:
1571 except TypeError:
1572 pass
1572 pass
1573
1573
1574 def lookup(self, id):
1574 def lookup(self, id):
1575 """locate a node based on:
1575 """locate a node based on:
1576 - revision number or str(revision number)
1576 - revision number or str(revision number)
1577 - nodeid or subset of hex nodeid
1577 - nodeid or subset of hex nodeid
1578 """
1578 """
1579 n = self._match(id)
1579 n = self._match(id)
1580 if n is not None:
1580 if n is not None:
1581 return n
1581 return n
1582 n = self._partialmatch(id)
1582 n = self._partialmatch(id)
1583 if n:
1583 if n:
1584 return n
1584 return n
1585
1585
1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1586 raise error.LookupError(id, self.display_id, _(b'no match found'))
1587
1587
1588 def shortest(self, node, minlength=1):
1588 def shortest(self, node, minlength=1):
1589 """Find the shortest unambiguous prefix that matches node."""
1589 """Find the shortest unambiguous prefix that matches node."""
1590
1590
1591 def isvalid(prefix):
1591 def isvalid(prefix):
1592 try:
1592 try:
1593 matchednode = self._partialmatch(prefix)
1593 matchednode = self._partialmatch(prefix)
1594 except error.AmbiguousPrefixLookupError:
1594 except error.AmbiguousPrefixLookupError:
1595 return False
1595 return False
1596 except error.WdirUnsupported:
1596 except error.WdirUnsupported:
1597 # single 'ff...' match
1597 # single 'ff...' match
1598 return True
1598 return True
1599 if matchednode is None:
1599 if matchednode is None:
1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1600 raise error.LookupError(node, self.display_id, _(b'no node'))
1601 return True
1601 return True
1602
1602
1603 def maybewdir(prefix):
1603 def maybewdir(prefix):
1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1604 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1605
1605
1606 hexnode = hex(node)
1606 hexnode = hex(node)
1607
1607
1608 def disambiguate(hexnode, minlength):
1608 def disambiguate(hexnode, minlength):
1609 """Disambiguate against wdirid."""
1609 """Disambiguate against wdirid."""
1610 for length in range(minlength, len(hexnode) + 1):
1610 for length in range(minlength, len(hexnode) + 1):
1611 prefix = hexnode[:length]
1611 prefix = hexnode[:length]
1612 if not maybewdir(prefix):
1612 if not maybewdir(prefix):
1613 return prefix
1613 return prefix
1614
1614
1615 if not getattr(self, 'filteredrevs', None):
1615 if not getattr(self, 'filteredrevs', None):
1616 try:
1616 try:
1617 length = max(self.index.shortest(node), minlength)
1617 length = max(self.index.shortest(node), minlength)
1618 return disambiguate(hexnode, length)
1618 return disambiguate(hexnode, length)
1619 except error.RevlogError:
1619 except error.RevlogError:
1620 if node != self.nodeconstants.wdirid:
1620 if node != self.nodeconstants.wdirid:
1621 raise error.LookupError(
1621 raise error.LookupError(
1622 node, self.display_id, _(b'no node')
1622 node, self.display_id, _(b'no node')
1623 )
1623 )
1624 except AttributeError:
1624 except AttributeError:
1625 # Fall through to pure code
1625 # Fall through to pure code
1626 pass
1626 pass
1627
1627
1628 if node == self.nodeconstants.wdirid:
1628 if node == self.nodeconstants.wdirid:
1629 for length in range(minlength, len(hexnode) + 1):
1629 for length in range(minlength, len(hexnode) + 1):
1630 prefix = hexnode[:length]
1630 prefix = hexnode[:length]
1631 if isvalid(prefix):
1631 if isvalid(prefix):
1632 return prefix
1632 return prefix
1633
1633
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if isvalid(prefix):
1636 if isvalid(prefix):
1637 return disambiguate(hexnode, length)
1637 return disambiguate(hexnode, length)
1638
1638
1639 def cmp(self, node, text):
1639 def cmp(self, node, text):
1640 """compare text with a given file revision
1640 """compare text with a given file revision
1641
1641
1642 returns True if text is different than what is stored.
1642 returns True if text is different than what is stored.
1643 """
1643 """
1644 p1, p2 = self.parents(node)
1644 p1, p2 = self.parents(node)
1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1645 return storageutil.hashrevisionsha1(text, p1, p2) != node
1646
1646
1647 def _cachesegment(self, offset, data):
1647 def _cachesegment(self, offset, data):
1648 """Add a segment to the revlog cache.
1648 """Add a segment to the revlog cache.
1649
1649
1650 Accepts an absolute offset and the data that is at that location.
1650 Accepts an absolute offset and the data that is at that location.
1651 """
1651 """
1652 o, d = self._chunkcache
1652 o, d = self._chunkcache
1653 # try to add to existing cache
1653 # try to add to existing cache
1654 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1654 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1655 self._chunkcache = o, d + data
1655 self._chunkcache = o, d + data
1656 else:
1656 else:
1657 self._chunkcache = offset, data
1657 self._chunkcache = offset, data
1658
1658
1659 def _readsegment(self, offset, length, df=None):
1659 def _readsegment(self, offset, length, df=None):
1660 """Load a segment of raw data from the revlog.
1660 """Load a segment of raw data from the revlog.
1661
1661
1662 Accepts an absolute offset, length to read, and an optional existing
1662 Accepts an absolute offset, length to read, and an optional existing
1663 file handle to read from.
1663 file handle to read from.
1664
1664
1665 If an existing file handle is passed, it will be seeked and the
1665 If an existing file handle is passed, it will be seeked and the
1666 original seek position will NOT be restored.
1666 original seek position will NOT be restored.
1667
1667
1668 Returns a str or buffer of raw byte data.
1668 Returns a str or buffer of raw byte data.
1669
1669
1670 Raises if the requested number of bytes could not be read.
1670 Raises if the requested number of bytes could not be read.
1671 """
1671 """
1672 # Cache data both forward and backward around the requested
1672 # Cache data both forward and backward around the requested
1673 # data, in a fixed size window. This helps speed up operations
1673 # data, in a fixed size window. This helps speed up operations
1674 # involving reading the revlog backwards.
1674 # involving reading the revlog backwards.
1675 cachesize = self._chunkcachesize
1675 cachesize = self._chunkcachesize
1676 realoffset = offset & ~(cachesize - 1)
1676 realoffset = offset & ~(cachesize - 1)
1677 reallength = (
1677 reallength = (
1678 (offset + length + cachesize) & ~(cachesize - 1)
1678 (offset + length + cachesize) & ~(cachesize - 1)
1679 ) - realoffset
1679 ) - realoffset
1680 with self._datareadfp(df) as df:
1680 with self._datareadfp(df) as df:
1681 df.seek(realoffset)
1681 df.seek(realoffset)
1682 d = df.read(reallength)
1682 d = df.read(reallength)
1683
1683
1684 self._cachesegment(realoffset, d)
1684 self._cachesegment(realoffset, d)
1685 if offset != realoffset or reallength != length:
1685 if offset != realoffset or reallength != length:
1686 startoffset = offset - realoffset
1686 startoffset = offset - realoffset
1687 if len(d) - startoffset < length:
1687 if len(d) - startoffset < length:
1688 filename = self._indexfile if self._inline else self._datafile
1688 filename = self._indexfile if self._inline else self._datafile
1689 got = len(d) - startoffset
1689 got = len(d) - startoffset
1690 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1690 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1691 raise error.RevlogError(m)
1691 raise error.RevlogError(m)
1692 return util.buffer(d, startoffset, length)
1692 return util.buffer(d, startoffset, length)
1693
1693
1694 if len(d) < length:
1694 if len(d) < length:
1695 filename = self._indexfile if self._inline else self._datafile
1695 filename = self._indexfile if self._inline else self._datafile
1696 got = len(d) - startoffset
1696 got = len(d) - startoffset
1697 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1697 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1698 raise error.RevlogError(m)
1698 raise error.RevlogError(m)
1699
1699
1700 return d
1700 return d
1701
1701
1702 def _getsegment(self, offset, length, df=None):
1702 def _getsegment(self, offset, length, df=None):
1703 """Obtain a segment of raw data from the revlog.
1703 """Obtain a segment of raw data from the revlog.
1704
1704
1705 Accepts an absolute offset, length of bytes to obtain, and an
1705 Accepts an absolute offset, length of bytes to obtain, and an
1706 optional file handle to the already-opened revlog. If the file
1706 optional file handle to the already-opened revlog. If the file
1707 handle is used, it's original seek position will not be preserved.
1707 handle is used, it's original seek position will not be preserved.
1708
1708
1709 Requests for data may be returned from a cache.
1709 Requests for data may be returned from a cache.
1710
1710
1711 Returns a str or a buffer instance of raw byte data.
1711 Returns a str or a buffer instance of raw byte data.
1712 """
1712 """
1713 o, d = self._chunkcache
1713 o, d = self._chunkcache
1714 l = len(d)
1714 l = len(d)
1715
1715
1716 # is it in the cache?
1716 # is it in the cache?
1717 cachestart = offset - o
1717 cachestart = offset - o
1718 cacheend = cachestart + length
1718 cacheend = cachestart + length
1719 if cachestart >= 0 and cacheend <= l:
1719 if cachestart >= 0 and cacheend <= l:
1720 if cachestart == 0 and cacheend == l:
1720 if cachestart == 0 and cacheend == l:
1721 return d # avoid a copy
1721 return d # avoid a copy
1722 return util.buffer(d, cachestart, cacheend - cachestart)
1722 return util.buffer(d, cachestart, cacheend - cachestart)
1723
1723
1724 return self._readsegment(offset, length, df=df)
1724 return self._readsegment(offset, length, df=df)
1725
1725
1726 def _getsegmentforrevs(self, startrev, endrev, df=None):
1726 def _getsegmentforrevs(self, startrev, endrev, df=None):
1727 """Obtain a segment of raw data corresponding to a range of revisions.
1727 """Obtain a segment of raw data corresponding to a range of revisions.
1728
1728
1729 Accepts the start and end revisions and an optional already-open
1729 Accepts the start and end revisions and an optional already-open
1730 file handle to be used for reading. If the file handle is read, its
1730 file handle to be used for reading. If the file handle is read, its
1731 seek position will not be preserved.
1731 seek position will not be preserved.
1732
1732
1733 Requests for data may be satisfied by a cache.
1733 Requests for data may be satisfied by a cache.
1734
1734
1735 Returns a 2-tuple of (offset, data) for the requested range of
1735 Returns a 2-tuple of (offset, data) for the requested range of
1736 revisions. Offset is the integer offset from the beginning of the
1736 revisions. Offset is the integer offset from the beginning of the
1737 revlog and data is a str or buffer of the raw byte data.
1737 revlog and data is a str or buffer of the raw byte data.
1738
1738
1739 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1739 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1740 to determine where each revision's data begins and ends.
1740 to determine where each revision's data begins and ends.
1741 """
1741 """
1742 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1742 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1743 # (functions are expensive).
1743 # (functions are expensive).
1744 index = self.index
1744 index = self.index
1745 istart = index[startrev]
1745 istart = index[startrev]
1746 start = int(istart[0] >> 16)
1746 start = int(istart[0] >> 16)
1747 if startrev == endrev:
1747 if startrev == endrev:
1748 end = start + istart[1]
1748 end = start + istart[1]
1749 else:
1749 else:
1750 iend = index[endrev]
1750 iend = index[endrev]
1751 end = int(iend[0] >> 16) + iend[1]
1751 end = int(iend[0] >> 16) + iend[1]
1752
1752
1753 if self._inline:
1753 if self._inline:
1754 start += (startrev + 1) * self.index.entry_size
1754 start += (startrev + 1) * self.index.entry_size
1755 end += (endrev + 1) * self.index.entry_size
1755 end += (endrev + 1) * self.index.entry_size
1756 length = end - start
1756 length = end - start
1757
1757
1758 return start, self._getsegment(start, length, df=df)
1758 return start, self._getsegment(start, length, df=df)
1759
1759
1760 def _chunk(self, rev, df=None):
1760 def _chunk(self, rev, df=None):
1761 """Obtain a single decompressed chunk for a revision.
1761 """Obtain a single decompressed chunk for a revision.
1762
1762
1763 Accepts an integer revision and an optional already-open file handle
1763 Accepts an integer revision and an optional already-open file handle
1764 to be used for reading. If used, the seek position of the file will not
1764 to be used for reading. If used, the seek position of the file will not
1765 be preserved.
1765 be preserved.
1766
1766
1767 Returns a str holding uncompressed data for the requested revision.
1767 Returns a str holding uncompressed data for the requested revision.
1768 """
1768 """
1769 compression_mode = self.index[rev][10]
1769 compression_mode = self.index[rev][10]
1770 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1770 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1771 if compression_mode == COMP_MODE_PLAIN:
1771 if compression_mode == COMP_MODE_PLAIN:
1772 return data
1772 return data
1773 elif compression_mode == COMP_MODE_DEFAULT:
1773 elif compression_mode == COMP_MODE_DEFAULT:
1774 return self._decompressor(data)
1774 return self._decompressor(data)
1775 elif compression_mode == COMP_MODE_INLINE:
1775 elif compression_mode == COMP_MODE_INLINE:
1776 return self.decompress(data)
1776 return self.decompress(data)
1777 else:
1777 else:
1778 msg = 'unknown compression mode %d'
1778 msg = 'unknown compression mode %d'
1779 msg %= compression_mode
1779 msg %= compression_mode
1780 raise error.RevlogError(msg)
1780 raise error.RevlogError(msg)
1781
1781
1782 def _chunks(self, revs, df=None, targetsize=None):
1782 def _chunks(self, revs, df=None, targetsize=None):
1783 """Obtain decompressed chunks for the specified revisions.
1783 """Obtain decompressed chunks for the specified revisions.
1784
1784
1785 Accepts an iterable of numeric revisions that are assumed to be in
1785 Accepts an iterable of numeric revisions that are assumed to be in
1786 ascending order. Also accepts an optional already-open file handle
1786 ascending order. Also accepts an optional already-open file handle
1787 to be used for reading. If used, the seek position of the file will
1787 to be used for reading. If used, the seek position of the file will
1788 not be preserved.
1788 not be preserved.
1789
1789
1790 This function is similar to calling ``self._chunk()`` multiple times,
1790 This function is similar to calling ``self._chunk()`` multiple times,
1791 but is faster.
1791 but is faster.
1792
1792
1793 Returns a list with decompressed data for each requested revision.
1793 Returns a list with decompressed data for each requested revision.
1794 """
1794 """
1795 if not revs:
1795 if not revs:
1796 return []
1796 return []
1797 start = self.start
1797 start = self.start
1798 length = self.length
1798 length = self.length
1799 inline = self._inline
1799 inline = self._inline
1800 iosize = self.index.entry_size
1800 iosize = self.index.entry_size
1801 buffer = util.buffer
1801 buffer = util.buffer
1802
1802
1803 l = []
1803 l = []
1804 ladd = l.append
1804 ladd = l.append
1805
1805
1806 if not self._withsparseread:
1806 if not self._withsparseread:
1807 slicedchunks = (revs,)
1807 slicedchunks = (revs,)
1808 else:
1808 else:
1809 slicedchunks = deltautil.slicechunk(
1809 slicedchunks = deltautil.slicechunk(
1810 self, revs, targetsize=targetsize
1810 self, revs, targetsize=targetsize
1811 )
1811 )
1812
1812
1813 for revschunk in slicedchunks:
1813 for revschunk in slicedchunks:
1814 firstrev = revschunk[0]
1814 firstrev = revschunk[0]
1815 # Skip trailing revisions with empty diff
1815 # Skip trailing revisions with empty diff
1816 for lastrev in revschunk[::-1]:
1816 for lastrev in revschunk[::-1]:
1817 if length(lastrev) != 0:
1817 if length(lastrev) != 0:
1818 break
1818 break
1819
1819
1820 try:
1820 try:
1821 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1821 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1822 except OverflowError:
1822 except OverflowError:
1823 # issue4215 - we can't cache a run of chunks greater than
1823 # issue4215 - we can't cache a run of chunks greater than
1824 # 2G on Windows
1824 # 2G on Windows
1825 return [self._chunk(rev, df=df) for rev in revschunk]
1825 return [self._chunk(rev, df=df) for rev in revschunk]
1826
1826
1827 decomp = self.decompress
1827 decomp = self.decompress
1828 # self._decompressor might be None, but will not be used in that case
1828 # self._decompressor might be None, but will not be used in that case
1829 def_decomp = self._decompressor
1829 def_decomp = self._decompressor
1830 for rev in revschunk:
1830 for rev in revschunk:
1831 chunkstart = start(rev)
1831 chunkstart = start(rev)
1832 if inline:
1832 if inline:
1833 chunkstart += (rev + 1) * iosize
1833 chunkstart += (rev + 1) * iosize
1834 chunklength = length(rev)
1834 chunklength = length(rev)
1835 comp_mode = self.index[rev][10]
1835 comp_mode = self.index[rev][10]
1836 c = buffer(data, chunkstart - offset, chunklength)
1836 c = buffer(data, chunkstart - offset, chunklength)
1837 if comp_mode == COMP_MODE_PLAIN:
1837 if comp_mode == COMP_MODE_PLAIN:
1838 ladd(c)
1838 ladd(c)
1839 elif comp_mode == COMP_MODE_INLINE:
1839 elif comp_mode == COMP_MODE_INLINE:
1840 ladd(decomp(c))
1840 ladd(decomp(c))
1841 elif comp_mode == COMP_MODE_DEFAULT:
1841 elif comp_mode == COMP_MODE_DEFAULT:
1842 ladd(def_decomp(c))
1842 ladd(def_decomp(c))
1843 else:
1843 else:
1844 msg = 'unknown compression mode %d'
1844 msg = 'unknown compression mode %d'
1845 msg %= comp_mode
1845 msg %= comp_mode
1846 raise error.RevlogError(msg)
1846 raise error.RevlogError(msg)
1847
1847
1848 return l
1848 return l
1849
1849
1850 def _chunkclear(self):
1850 def _chunkclear(self):
1851 """Clear the raw chunk cache."""
1851 """Clear the raw chunk cache."""
1852 self._chunkcache = (0, b'')
1852 self._chunkcache = (0, b'')
1853
1853
1854 def deltaparent(self, rev):
1854 def deltaparent(self, rev):
1855 """return deltaparent of the given revision"""
1855 """return deltaparent of the given revision"""
1856 base = self.index[rev][3]
1856 base = self.index[rev][3]
1857 if base == rev:
1857 if base == rev:
1858 return nullrev
1858 return nullrev
1859 elif self._generaldelta:
1859 elif self._generaldelta:
1860 return base
1860 return base
1861 else:
1861 else:
1862 return rev - 1
1862 return rev - 1
1863
1863
1864 def issnapshot(self, rev):
1864 def issnapshot(self, rev):
1865 """tells whether rev is a snapshot"""
1865 """tells whether rev is a snapshot"""
1866 if not self._sparserevlog:
1866 if not self._sparserevlog:
1867 return self.deltaparent(rev) == nullrev
1867 return self.deltaparent(rev) == nullrev
1868 elif util.safehasattr(self.index, b'issnapshot'):
1868 elif util.safehasattr(self.index, b'issnapshot'):
1869 # directly assign the method to cache the testing and access
1869 # directly assign the method to cache the testing and access
1870 self.issnapshot = self.index.issnapshot
1870 self.issnapshot = self.index.issnapshot
1871 return self.issnapshot(rev)
1871 return self.issnapshot(rev)
1872 if rev == nullrev:
1872 if rev == nullrev:
1873 return True
1873 return True
1874 entry = self.index[rev]
1874 entry = self.index[rev]
1875 base = entry[3]
1875 base = entry[3]
1876 if base == rev:
1876 if base == rev:
1877 return True
1877 return True
1878 if base == nullrev:
1878 if base == nullrev:
1879 return True
1879 return True
1880 p1 = entry[5]
1880 p1 = entry[5]
1881 p2 = entry[6]
1881 p2 = entry[6]
1882 if base == p1 or base == p2:
1882 if base == p1 or base == p2:
1883 return False
1883 return False
1884 return self.issnapshot(base)
1884 return self.issnapshot(base)
1885
1885
1886 def snapshotdepth(self, rev):
1886 def snapshotdepth(self, rev):
1887 """number of snapshot in the chain before this one"""
1887 """number of snapshot in the chain before this one"""
1888 if not self.issnapshot(rev):
1888 if not self.issnapshot(rev):
1889 raise error.ProgrammingError(b'revision %d not a snapshot')
1889 raise error.ProgrammingError(b'revision %d not a snapshot')
1890 return len(self._deltachain(rev)[0]) - 1
1890 return len(self._deltachain(rev)[0]) - 1
1891
1891
1892 def revdiff(self, rev1, rev2):
1892 def revdiff(self, rev1, rev2):
1893 """return or calculate a delta between two revisions
1893 """return or calculate a delta between two revisions
1894
1894
1895 The delta calculated is in binary form and is intended to be written to
1895 The delta calculated is in binary form and is intended to be written to
1896 revlog data directly. So this function needs raw revision data.
1896 revlog data directly. So this function needs raw revision data.
1897 """
1897 """
1898 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1898 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1899 return bytes(self._chunk(rev2))
1899 return bytes(self._chunk(rev2))
1900
1900
1901 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1901 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1902
1902
1903 def _processflags(self, text, flags, operation, raw=False):
1903 def _processflags(self, text, flags, operation, raw=False):
1904 """deprecated entry point to access flag processors"""
1904 """deprecated entry point to access flag processors"""
1905 msg = b'_processflag(...) use the specialized variant'
1905 msg = b'_processflag(...) use the specialized variant'
1906 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1906 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1907 if raw:
1907 if raw:
1908 return text, flagutil.processflagsraw(self, text, flags)
1908 return text, flagutil.processflagsraw(self, text, flags)
1909 elif operation == b'read':
1909 elif operation == b'read':
1910 return flagutil.processflagsread(self, text, flags)
1910 return flagutil.processflagsread(self, text, flags)
1911 else: # write operation
1911 else: # write operation
1912 return flagutil.processflagswrite(self, text, flags)
1912 return flagutil.processflagswrite(self, text, flags)
1913
1913
1914 def revision(self, nodeorrev, _df=None, raw=False):
1914 def revision(self, nodeorrev, _df=None, raw=False):
1915 """return an uncompressed revision of a given node or revision
1915 """return an uncompressed revision of a given node or revision
1916 number.
1916 number.
1917
1917
1918 _df - an existing file handle to read from. (internal-only)
1918 _df - an existing file handle to read from. (internal-only)
1919 raw - an optional argument specifying if the revision data is to be
1919 raw - an optional argument specifying if the revision data is to be
1920 treated as raw data when applying flag transforms. 'raw' should be set
1920 treated as raw data when applying flag transforms. 'raw' should be set
1921 to True when generating changegroups or in debug commands.
1921 to True when generating changegroups or in debug commands.
1922 """
1922 """
1923 if raw:
1923 if raw:
1924 msg = (
1924 msg = (
1925 b'revlog.revision(..., raw=True) is deprecated, '
1925 b'revlog.revision(..., raw=True) is deprecated, '
1926 b'use revlog.rawdata(...)'
1926 b'use revlog.rawdata(...)'
1927 )
1927 )
1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1928 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1929 return self._revisiondata(nodeorrev, _df, raw=raw)
1929 return self._revisiondata(nodeorrev, _df, raw=raw)
1930
1930
1931 def sidedata(self, nodeorrev, _df=None):
1931 def sidedata(self, nodeorrev, _df=None):
1932 """a map of extra data related to the changeset but not part of the hash
1932 """a map of extra data related to the changeset but not part of the hash
1933
1933
1934 This function currently return a dictionary. However, more advanced
1934 This function currently return a dictionary. However, more advanced
1935 mapping object will likely be used in the future for a more
1935 mapping object will likely be used in the future for a more
1936 efficient/lazy code.
1936 efficient/lazy code.
1937 """
1937 """
1938 # deal with <nodeorrev> argument type
1938 # deal with <nodeorrev> argument type
1939 if isinstance(nodeorrev, int):
1939 if isinstance(nodeorrev, int):
1940 rev = nodeorrev
1940 rev = nodeorrev
1941 else:
1941 else:
1942 rev = self.rev(nodeorrev)
1942 rev = self.rev(nodeorrev)
1943 return self._sidedata(rev)
1943 return self._sidedata(rev)
1944
1944
1945 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1945 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1946 # deal with <nodeorrev> argument type
1946 # deal with <nodeorrev> argument type
1947 if isinstance(nodeorrev, int):
1947 if isinstance(nodeorrev, int):
1948 rev = nodeorrev
1948 rev = nodeorrev
1949 node = self.node(rev)
1949 node = self.node(rev)
1950 else:
1950 else:
1951 node = nodeorrev
1951 node = nodeorrev
1952 rev = None
1952 rev = None
1953
1953
1954 # fast path the special `nullid` rev
1954 # fast path the special `nullid` rev
1955 if node == self.nullid:
1955 if node == self.nullid:
1956 return b""
1956 return b""
1957
1957
1958 # ``rawtext`` is the text as stored inside the revlog. Might be the
1958 # ``rawtext`` is the text as stored inside the revlog. Might be the
1959 # revision or might need to be processed to retrieve the revision.
1959 # revision or might need to be processed to retrieve the revision.
1960 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1960 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1961
1961
1962 if raw and validated:
1962 if raw and validated:
1963 # if we don't want to process the raw text and that raw
1963 # if we don't want to process the raw text and that raw
1964 # text is cached, we can exit early.
1964 # text is cached, we can exit early.
1965 return rawtext
1965 return rawtext
1966 if rev is None:
1966 if rev is None:
1967 rev = self.rev(node)
1967 rev = self.rev(node)
1968 # the revlog's flag for this revision
1968 # the revlog's flag for this revision
1969 # (usually alter its state or content)
1969 # (usually alter its state or content)
1970 flags = self.flags(rev)
1970 flags = self.flags(rev)
1971
1971
1972 if validated and flags == REVIDX_DEFAULT_FLAGS:
1972 if validated and flags == REVIDX_DEFAULT_FLAGS:
1973 # no extra flags set, no flag processor runs, text = rawtext
1973 # no extra flags set, no flag processor runs, text = rawtext
1974 return rawtext
1974 return rawtext
1975
1975
1976 if raw:
1976 if raw:
1977 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1977 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1978 text = rawtext
1978 text = rawtext
1979 else:
1979 else:
1980 r = flagutil.processflagsread(self, rawtext, flags)
1980 r = flagutil.processflagsread(self, rawtext, flags)
1981 text, validatehash = r
1981 text, validatehash = r
1982 if validatehash:
1982 if validatehash:
1983 self.checkhash(text, node, rev=rev)
1983 self.checkhash(text, node, rev=rev)
1984 if not validated:
1984 if not validated:
1985 self._revisioncache = (node, rev, rawtext)
1985 self._revisioncache = (node, rev, rawtext)
1986
1986
1987 return text
1987 return text
1988
1988
1989 def _rawtext(self, node, rev, _df=None):
1989 def _rawtext(self, node, rev, _df=None):
1990 """return the possibly unvalidated rawtext for a revision
1990 """return the possibly unvalidated rawtext for a revision
1991
1991
1992 returns (rev, rawtext, validated)
1992 returns (rev, rawtext, validated)
1993 """
1993 """
1994
1994
1995 # revision in the cache (could be useful to apply delta)
1995 # revision in the cache (could be useful to apply delta)
1996 cachedrev = None
1996 cachedrev = None
1997 # An intermediate text to apply deltas to
1997 # An intermediate text to apply deltas to
1998 basetext = None
1998 basetext = None
1999
1999
2000 # Check if we have the entry in cache
2000 # Check if we have the entry in cache
2001 # The cache entry looks like (node, rev, rawtext)
2001 # The cache entry looks like (node, rev, rawtext)
2002 if self._revisioncache:
2002 if self._revisioncache:
2003 if self._revisioncache[0] == node:
2003 if self._revisioncache[0] == node:
2004 return (rev, self._revisioncache[2], True)
2004 return (rev, self._revisioncache[2], True)
2005 cachedrev = self._revisioncache[1]
2005 cachedrev = self._revisioncache[1]
2006
2006
2007 if rev is None:
2007 if rev is None:
2008 rev = self.rev(node)
2008 rev = self.rev(node)
2009
2009
2010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2010 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2011 if stopped:
2011 if stopped:
2012 basetext = self._revisioncache[2]
2012 basetext = self._revisioncache[2]
2013
2013
2014 # drop cache to save memory, the caller is expected to
2014 # drop cache to save memory, the caller is expected to
2015 # update self._revisioncache after validating the text
2015 # update self._revisioncache after validating the text
2016 self._revisioncache = None
2016 self._revisioncache = None
2017
2017
2018 targetsize = None
2018 targetsize = None
2019 rawsize = self.index[rev][2]
2019 rawsize = self.index[rev][2]
2020 if 0 <= rawsize:
2020 if 0 <= rawsize:
2021 targetsize = 4 * rawsize
2021 targetsize = 4 * rawsize
2022
2022
2023 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2023 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2024 if basetext is None:
2024 if basetext is None:
2025 basetext = bytes(bins[0])
2025 basetext = bytes(bins[0])
2026 bins = bins[1:]
2026 bins = bins[1:]
2027
2027
2028 rawtext = mdiff.patches(basetext, bins)
2028 rawtext = mdiff.patches(basetext, bins)
2029 del basetext # let us have a chance to free memory early
2029 del basetext # let us have a chance to free memory early
2030 return (rev, rawtext, False)
2030 return (rev, rawtext, False)
2031
2031
2032 def _sidedata(self, rev):
2032 def _sidedata(self, rev):
2033 """Return the sidedata for a given revision number."""
2033 """Return the sidedata for a given revision number."""
2034 index_entry = self.index[rev]
2034 index_entry = self.index[rev]
2035 sidedata_offset = index_entry[8]
2035 sidedata_offset = index_entry[8]
2036 sidedata_size = index_entry[9]
2036 sidedata_size = index_entry[9]
2037
2037
2038 if self._inline:
2038 if self._inline:
2039 sidedata_offset += self.index.entry_size * (1 + rev)
2039 sidedata_offset += self.index.entry_size * (1 + rev)
2040 if sidedata_size == 0:
2040 if sidedata_size == 0:
2041 return {}
2041 return {}
2042
2042
2043 # XXX this need caching, as we do for data
2043 # XXX this need caching, as we do for data
2044 with self._sidedatareadfp() as sdf:
2044 with self._sidedatareadfp() as sdf:
2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2045 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2046 filename = self._sidedatafile
2046 filename = self._sidedatafile
2047 end = self._docket.sidedata_end
2047 end = self._docket.sidedata_end
2048 offset = sidedata_offset
2048 offset = sidedata_offset
2049 length = sidedata_size
2049 length = sidedata_size
2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2050 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2051 raise error.RevlogError(m)
2051 raise error.RevlogError(m)
2052
2052
2053 sdf.seek(sidedata_offset, os.SEEK_SET)
2053 sdf.seek(sidedata_offset, os.SEEK_SET)
2054 comp_segment = sdf.read(sidedata_size)
2054 comp_segment = sdf.read(sidedata_size)
2055
2055
2056 if len(comp_segment) < sidedata_size:
2056 if len(comp_segment) < sidedata_size:
2057 filename = self._sidedatafile
2057 filename = self._sidedatafile
2058 length = sidedata_size
2058 length = sidedata_size
2059 offset = sidedata_offset
2059 offset = sidedata_offset
2060 got = len(comp_segment)
2060 got = len(comp_segment)
2061 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2061 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2062 raise error.RevlogError(m)
2062 raise error.RevlogError(m)
2063
2063
2064 comp = self.index[rev][11]
2064 comp = self.index[rev][11]
2065 if comp == COMP_MODE_PLAIN:
2065 if comp == COMP_MODE_PLAIN:
2066 segment = comp_segment
2066 segment = comp_segment
2067 elif comp == COMP_MODE_DEFAULT:
2067 elif comp == COMP_MODE_DEFAULT:
2068 segment = self._decompressor(comp_segment)
2068 segment = self._decompressor(comp_segment)
2069 elif comp == COMP_MODE_INLINE:
2069 elif comp == COMP_MODE_INLINE:
2070 segment = self.decompress(comp_segment)
2070 segment = self.decompress(comp_segment)
2071 else:
2071 else:
2072 msg = 'unknown compression mode %d'
2072 msg = 'unknown compression mode %d'
2073 msg %= comp
2073 msg %= comp
2074 raise error.RevlogError(msg)
2074 raise error.RevlogError(msg)
2075
2075
2076 sidedata = sidedatautil.deserialize_sidedata(segment)
2076 sidedata = sidedatautil.deserialize_sidedata(segment)
2077 return sidedata
2077 return sidedata
2078
2078
2079 def rawdata(self, nodeorrev, _df=None):
2079 def rawdata(self, nodeorrev, _df=None):
2080 """return an uncompressed raw data of a given node or revision number.
2080 """return an uncompressed raw data of a given node or revision number.
2081
2081
2082 _df - an existing file handle to read from. (internal-only)
2082 _df - an existing file handle to read from. (internal-only)
2083 """
2083 """
2084 return self._revisiondata(nodeorrev, _df, raw=True)
2084 return self._revisiondata(nodeorrev, _df, raw=True)
2085
2085
2086 def hash(self, text, p1, p2):
2086 def hash(self, text, p1, p2):
2087 """Compute a node hash.
2087 """Compute a node hash.
2088
2088
2089 Available as a function so that subclasses can replace the hash
2089 Available as a function so that subclasses can replace the hash
2090 as needed.
2090 as needed.
2091 """
2091 """
2092 return storageutil.hashrevisionsha1(text, p1, p2)
2092 return storageutil.hashrevisionsha1(text, p1, p2)
2093
2093
2094 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2094 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2095 """Check node hash integrity.
2095 """Check node hash integrity.
2096
2096
2097 Available as a function so that subclasses can extend hash mismatch
2097 Available as a function so that subclasses can extend hash mismatch
2098 behaviors as needed.
2098 behaviors as needed.
2099 """
2099 """
2100 try:
2100 try:
2101 if p1 is None and p2 is None:
2101 if p1 is None and p2 is None:
2102 p1, p2 = self.parents(node)
2102 p1, p2 = self.parents(node)
2103 if node != self.hash(text, p1, p2):
2103 if node != self.hash(text, p1, p2):
2104 # Clear the revision cache on hash failure. The revision cache
2104 # Clear the revision cache on hash failure. The revision cache
2105 # only stores the raw revision and clearing the cache does have
2105 # only stores the raw revision and clearing the cache does have
2106 # the side-effect that we won't have a cache hit when the raw
2106 # the side-effect that we won't have a cache hit when the raw
2107 # revision data is accessed. But this case should be rare and
2107 # revision data is accessed. But this case should be rare and
2108 # it is extra work to teach the cache about the hash
2108 # it is extra work to teach the cache about the hash
2109 # verification state.
2109 # verification state.
2110 if self._revisioncache and self._revisioncache[0] == node:
2110 if self._revisioncache and self._revisioncache[0] == node:
2111 self._revisioncache = None
2111 self._revisioncache = None
2112
2112
2113 revornode = rev
2113 revornode = rev
2114 if revornode is None:
2114 if revornode is None:
2115 revornode = templatefilters.short(hex(node))
2115 revornode = templatefilters.short(hex(node))
2116 raise error.RevlogError(
2116 raise error.RevlogError(
2117 _(b"integrity check failed on %s:%s")
2117 _(b"integrity check failed on %s:%s")
2118 % (self.display_id, pycompat.bytestr(revornode))
2118 % (self.display_id, pycompat.bytestr(revornode))
2119 )
2119 )
2120 except error.RevlogError:
2120 except error.RevlogError:
2121 if self._censorable and storageutil.iscensoredtext(text):
2121 if self._censorable and storageutil.iscensoredtext(text):
2122 raise error.CensoredNodeError(self.display_id, node, text)
2122 raise error.CensoredNodeError(self.display_id, node, text)
2123 raise
2123 raise
2124
2124
2125 def _enforceinlinesize(self, tr):
2125 def _enforceinlinesize(self, tr):
2126 """Check if the revlog is too big for inline and convert if so.
2126 """Check if the revlog is too big for inline and convert if so.
2127
2127
2128 This should be called after revisions are added to the revlog. If the
2128 This should be called after revisions are added to the revlog. If the
2129 revlog has grown too large to be an inline revlog, it will convert it
2129 revlog has grown too large to be an inline revlog, it will convert it
2130 to use multiple index and data files.
2130 to use multiple index and data files.
2131 """
2131 """
2132 tiprev = len(self) - 1
2132 tiprev = len(self) - 1
2133 total_size = self.start(tiprev) + self.length(tiprev)
2133 total_size = self.start(tiprev) + self.length(tiprev)
2134 if not self._inline or total_size < _maxinline:
2134 if not self._inline or total_size < _maxinline:
2135 return
2135 return
2136
2136
2137 troffset = tr.findoffset(self._indexfile)
2137 troffset = tr.findoffset(self._indexfile)
2138 if troffset is None:
2138 if troffset is None:
2139 raise error.RevlogError(
2139 raise error.RevlogError(
2140 _(b"%s not found in the transaction") % self._indexfile
2140 _(b"%s not found in the transaction") % self._indexfile
2141 )
2141 )
2142 trindex = 0
2142 trindex = 0
2143 tr.add(self._datafile, 0)
2143 tr.add(self._datafile, 0)
2144
2144
2145 existing_handles = False
2145 existing_handles = False
2146 if self._writinghandles is not None:
2146 if self._writinghandles is not None:
2147 existing_handles = True
2147 existing_handles = True
2148 fp = self._writinghandles[0]
2148 fp = self._writinghandles[0]
2149 fp.flush()
2149 fp.flush()
2150 fp.close()
2150 fp.close()
2151 # We can't use the cached file handle after close(). So prevent
2151 # We can't use the cached file handle after close(). So prevent
2152 # its usage.
2152 # its usage.
2153 self._writinghandles = None
2153 self._writinghandles = None
2154
2154
2155 new_dfh = self._datafp(b'w+')
2155 new_dfh = self._datafp(b'w+')
2156 new_dfh.truncate(0) # drop any potentially existing data
2156 new_dfh.truncate(0) # drop any potentially existing data
2157 try:
2157 try:
2158 with self._indexfp() as read_ifh:
2158 with self._indexfp() as read_ifh:
2159 for r in self:
2159 for r in self:
2160 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2160 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2161 if troffset <= self.start(r) + r * self.index.entry_size:
2161 if troffset <= self.start(r) + r * self.index.entry_size:
2162 trindex = r
2162 trindex = r
2163 new_dfh.flush()
2163 new_dfh.flush()
2164
2164
2165 with self.__index_new_fp() as fp:
2165 with self.__index_new_fp() as fp:
2166 self._format_flags &= ~FLAG_INLINE_DATA
2166 self._format_flags &= ~FLAG_INLINE_DATA
2167 self._inline = False
2167 self._inline = False
2168 for i in self:
2168 for i in self:
2169 e = self.index.entry_binary(i)
2169 e = self.index.entry_binary(i)
2170 if i == 0 and self._docket is None:
2170 if i == 0 and self._docket is None:
2171 header = self._format_flags | self._format_version
2171 header = self._format_flags | self._format_version
2172 header = self.index.pack_header(header)
2172 header = self.index.pack_header(header)
2173 e = header + e
2173 e = header + e
2174 fp.write(e)
2174 fp.write(e)
2175 if self._docket is not None:
2175 if self._docket is not None:
2176 self._docket.index_end = fp.tell()
2176 self._docket.index_end = fp.tell()
2177
2177
2178 # There is a small transactional race here. If the rename of
2178 # There is a small transactional race here. If the rename of
2179 # the index fails, we should remove the datafile. It is more
2179 # the index fails, we should remove the datafile. It is more
2180 # important to ensure that the data file is not truncated
2180 # important to ensure that the data file is not truncated
2181 # when the index is replaced as otherwise data is lost.
2181 # when the index is replaced as otherwise data is lost.
2182 tr.replace(self._datafile, self.start(trindex))
2182 tr.replace(self._datafile, self.start(trindex))
2183
2183
2184 # the temp file replace the real index when we exit the context
2184 # the temp file replace the real index when we exit the context
2185 # manager
2185 # manager
2186
2186
2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2187 tr.replace(self._indexfile, trindex * self.index.entry_size)
2188 nodemaputil.setup_persistent_nodemap(tr, self)
2188 nodemaputil.setup_persistent_nodemap(tr, self)
2189 self._chunkclear()
2189 self._chunkclear()
2190
2190
2191 if existing_handles:
2191 if existing_handles:
2192 # switched from inline to conventional reopen the index
2192 # switched from inline to conventional reopen the index
2193 ifh = self.__index_write_fp()
2193 ifh = self.__index_write_fp()
2194 self._writinghandles = (ifh, new_dfh, None)
2194 self._writinghandles = (ifh, new_dfh, None)
2195 new_dfh = None
2195 new_dfh = None
2196 finally:
2196 finally:
2197 if new_dfh is not None:
2197 if new_dfh is not None:
2198 new_dfh.close()
2198 new_dfh.close()
2199
2199
2200 def _nodeduplicatecallback(self, transaction, node):
2200 def _nodeduplicatecallback(self, transaction, node):
2201 """called when trying to add a node already stored."""
2201 """called when trying to add a node already stored."""
2202
2202
2203 @contextlib.contextmanager
2203 @contextlib.contextmanager
2204 def _writing(self, transaction):
2204 def _writing(self, transaction):
2205 if self._trypending:
2205 if self._trypending:
2206 msg = b'try to write in a `trypending` revlog: %s'
2206 msg = b'try to write in a `trypending` revlog: %s'
2207 msg %= self.display_id
2207 msg %= self.display_id
2208 raise error.ProgrammingError(msg)
2208 raise error.ProgrammingError(msg)
2209 if self._writinghandles is not None:
2209 if self._writinghandles is not None:
2210 yield
2210 yield
2211 else:
2211 else:
2212 ifh = dfh = sdfh = None
2212 ifh = dfh = sdfh = None
2213 try:
2213 try:
2214 r = len(self)
2214 r = len(self)
2215 # opening the data file.
2215 # opening the data file.
2216 dsize = 0
2216 dsize = 0
2217 if r:
2217 if r:
2218 dsize = self.end(r - 1)
2218 dsize = self.end(r - 1)
2219 dfh = None
2219 dfh = None
2220 if not self._inline:
2220 if not self._inline:
2221 try:
2221 try:
2222 dfh = self._datafp(b"r+")
2222 dfh = self._datafp(b"r+")
2223 if self._docket is None:
2223 if self._docket is None:
2224 dfh.seek(0, os.SEEK_END)
2224 dfh.seek(0, os.SEEK_END)
2225 else:
2225 else:
2226 dfh.seek(self._docket.data_end, os.SEEK_SET)
2226 dfh.seek(self._docket.data_end, os.SEEK_SET)
2227 except IOError as inst:
2227 except IOError as inst:
2228 if inst.errno != errno.ENOENT:
2228 if inst.errno != errno.ENOENT:
2229 raise
2229 raise
2230 dfh = self._datafp(b"w+")
2230 dfh = self._datafp(b"w+")
2231 transaction.add(self._datafile, dsize)
2231 transaction.add(self._datafile, dsize)
2232 if self._sidedatafile is not None:
2232 if self._sidedatafile is not None:
2233 try:
2233 try:
2234 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2234 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2235 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2235 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2236 except IOError as inst:
2236 except IOError as inst:
2237 if inst.errno != errno.ENOENT:
2237 if inst.errno != errno.ENOENT:
2238 raise
2238 raise
2239 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2239 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2240 transaction.add(
2240 transaction.add(
2241 self._sidedatafile, self._docket.sidedata_end
2241 self._sidedatafile, self._docket.sidedata_end
2242 )
2242 )
2243
2243
2244 # opening the index file.
2244 # opening the index file.
2245 isize = r * self.index.entry_size
2245 isize = r * self.index.entry_size
2246 ifh = self.__index_write_fp()
2246 ifh = self.__index_write_fp()
2247 if self._inline:
2247 if self._inline:
2248 transaction.add(self._indexfile, dsize + isize)
2248 transaction.add(self._indexfile, dsize + isize)
2249 else:
2249 else:
2250 transaction.add(self._indexfile, isize)
2250 transaction.add(self._indexfile, isize)
2251 # exposing all file handle for writing.
2251 # exposing all file handle for writing.
2252 self._writinghandles = (ifh, dfh, sdfh)
2252 self._writinghandles = (ifh, dfh, sdfh)
2253 yield
2253 yield
2254 if self._docket is not None:
2254 if self._docket is not None:
2255 self._write_docket(transaction)
2255 self._write_docket(transaction)
2256 finally:
2256 finally:
2257 self._writinghandles = None
2257 self._writinghandles = None
2258 if dfh is not None:
2258 if dfh is not None:
2259 dfh.close()
2259 dfh.close()
2260 if sdfh is not None:
2260 if sdfh is not None:
2261 dfh.close()
2261 dfh.close()
2262 # closing the index file last to avoid exposing referent to
2262 # closing the index file last to avoid exposing referent to
2263 # potential unflushed data content.
2263 # potential unflushed data content.
2264 if ifh is not None:
2264 if ifh is not None:
2265 ifh.close()
2265 ifh.close()
2266
2266
2267 def _write_docket(self, transaction):
2267 def _write_docket(self, transaction):
2268 """write the current docket on disk
2268 """write the current docket on disk
2269
2269
2270 Exist as a method to help changelog to implement transaction logic
2270 Exist as a method to help changelog to implement transaction logic
2271
2271
2272 We could also imagine using the same transaction logic for all revlog
2272 We could also imagine using the same transaction logic for all revlog
2273 since docket are cheap."""
2273 since docket are cheap."""
2274 self._docket.write(transaction)
2274 self._docket.write(transaction)
2275
2275
2276 def addrevision(
2276 def addrevision(
2277 self,
2277 self,
2278 text,
2278 text,
2279 transaction,
2279 transaction,
2280 link,
2280 link,
2281 p1,
2281 p1,
2282 p2,
2282 p2,
2283 cachedelta=None,
2283 cachedelta=None,
2284 node=None,
2284 node=None,
2285 flags=REVIDX_DEFAULT_FLAGS,
2285 flags=REVIDX_DEFAULT_FLAGS,
2286 deltacomputer=None,
2286 deltacomputer=None,
2287 sidedata=None,
2287 sidedata=None,
2288 ):
2288 ):
2289 """add a revision to the log
2289 """add a revision to the log
2290
2290
2291 text - the revision data to add
2291 text - the revision data to add
2292 transaction - the transaction object used for rollback
2292 transaction - the transaction object used for rollback
2293 link - the linkrev data to add
2293 link - the linkrev data to add
2294 p1, p2 - the parent nodeids of the revision
2294 p1, p2 - the parent nodeids of the revision
2295 cachedelta - an optional precomputed delta
2295 cachedelta - an optional precomputed delta
2296 node - nodeid of revision; typically node is not specified, and it is
2296 node - nodeid of revision; typically node is not specified, and it is
2297 computed by default as hash(text, p1, p2), however subclasses might
2297 computed by default as hash(text, p1, p2), however subclasses might
2298 use different hashing method (and override checkhash() in such case)
2298 use different hashing method (and override checkhash() in such case)
2299 flags - the known flags to set on the revision
2299 flags - the known flags to set on the revision
2300 deltacomputer - an optional deltacomputer instance shared between
2300 deltacomputer - an optional deltacomputer instance shared between
2301 multiple calls
2301 multiple calls
2302 """
2302 """
2303 if link == nullrev:
2303 if link == nullrev:
2304 raise error.RevlogError(
2304 raise error.RevlogError(
2305 _(b"attempted to add linkrev -1 to %s") % self.display_id
2305 _(b"attempted to add linkrev -1 to %s") % self.display_id
2306 )
2306 )
2307
2307
2308 if sidedata is None:
2308 if sidedata is None:
2309 sidedata = {}
2309 sidedata = {}
2310 elif sidedata and not self.hassidedata:
2310 elif sidedata and not self.hassidedata:
2311 raise error.ProgrammingError(
2311 raise error.ProgrammingError(
2312 _(b"trying to add sidedata to a revlog who don't support them")
2312 _(b"trying to add sidedata to a revlog who don't support them")
2313 )
2313 )
2314
2314
2315 if flags:
2315 if flags:
2316 node = node or self.hash(text, p1, p2)
2316 node = node or self.hash(text, p1, p2)
2317
2317
2318 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2318 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2319
2319
2320 # If the flag processor modifies the revision data, ignore any provided
2320 # If the flag processor modifies the revision data, ignore any provided
2321 # cachedelta.
2321 # cachedelta.
2322 if rawtext != text:
2322 if rawtext != text:
2323 cachedelta = None
2323 cachedelta = None
2324
2324
2325 if len(rawtext) > _maxentrysize:
2325 if len(rawtext) > _maxentrysize:
2326 raise error.RevlogError(
2326 raise error.RevlogError(
2327 _(
2327 _(
2328 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2328 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2329 )
2329 )
2330 % (self.display_id, len(rawtext))
2330 % (self.display_id, len(rawtext))
2331 )
2331 )
2332
2332
2333 node = node or self.hash(rawtext, p1, p2)
2333 node = node or self.hash(rawtext, p1, p2)
2334 rev = self.index.get_rev(node)
2334 rev = self.index.get_rev(node)
2335 if rev is not None:
2335 if rev is not None:
2336 return rev
2336 return rev
2337
2337
2338 if validatehash:
2338 if validatehash:
2339 self.checkhash(rawtext, node, p1=p1, p2=p2)
2339 self.checkhash(rawtext, node, p1=p1, p2=p2)
2340
2340
2341 return self.addrawrevision(
2341 return self.addrawrevision(
2342 rawtext,
2342 rawtext,
2343 transaction,
2343 transaction,
2344 link,
2344 link,
2345 p1,
2345 p1,
2346 p2,
2346 p2,
2347 node,
2347 node,
2348 flags,
2348 flags,
2349 cachedelta=cachedelta,
2349 cachedelta=cachedelta,
2350 deltacomputer=deltacomputer,
2350 deltacomputer=deltacomputer,
2351 sidedata=sidedata,
2351 sidedata=sidedata,
2352 )
2352 )
2353
2353
2354 def addrawrevision(
2354 def addrawrevision(
2355 self,
2355 self,
2356 rawtext,
2356 rawtext,
2357 transaction,
2357 transaction,
2358 link,
2358 link,
2359 p1,
2359 p1,
2360 p2,
2360 p2,
2361 node,
2361 node,
2362 flags,
2362 flags,
2363 cachedelta=None,
2363 cachedelta=None,
2364 deltacomputer=None,
2364 deltacomputer=None,
2365 sidedata=None,
2365 sidedata=None,
2366 ):
2366 ):
2367 """add a raw revision with known flags, node and parents
2367 """add a raw revision with known flags, node and parents
2368 useful when reusing a revision not stored in this revlog (ex: received
2368 useful when reusing a revision not stored in this revlog (ex: received
2369 over wire, or read from an external bundle).
2369 over wire, or read from an external bundle).
2370 """
2370 """
2371 with self._writing(transaction):
2371 with self._writing(transaction):
2372 return self._addrevision(
2372 return self._addrevision(
2373 node,
2373 node,
2374 rawtext,
2374 rawtext,
2375 transaction,
2375 transaction,
2376 link,
2376 link,
2377 p1,
2377 p1,
2378 p2,
2378 p2,
2379 flags,
2379 flags,
2380 cachedelta,
2380 cachedelta,
2381 deltacomputer=deltacomputer,
2381 deltacomputer=deltacomputer,
2382 sidedata=sidedata,
2382 sidedata=sidedata,
2383 )
2383 )
2384
2384
2385 def compress(self, data):
2385 def compress(self, data):
2386 """Generate a possibly-compressed representation of data."""
2386 """Generate a possibly-compressed representation of data."""
2387 if not data:
2387 if not data:
2388 return b'', data
2388 return b'', data
2389
2389
2390 compressed = self._compressor.compress(data)
2390 compressed = self._compressor.compress(data)
2391
2391
2392 if compressed:
2392 if compressed:
2393 # The revlog compressor added the header in the returned data.
2393 # The revlog compressor added the header in the returned data.
2394 return b'', compressed
2394 return b'', compressed
2395
2395
2396 if data[0:1] == b'\0':
2396 if data[0:1] == b'\0':
2397 return b'', data
2397 return b'', data
2398 return b'u', data
2398 return b'u', data
2399
2399
2400 def decompress(self, data):
2400 def decompress(self, data):
2401 """Decompress a revlog chunk.
2401 """Decompress a revlog chunk.
2402
2402
2403 The chunk is expected to begin with a header identifying the
2403 The chunk is expected to begin with a header identifying the
2404 format type so it can be routed to an appropriate decompressor.
2404 format type so it can be routed to an appropriate decompressor.
2405 """
2405 """
2406 if not data:
2406 if not data:
2407 return data
2407 return data
2408
2408
2409 # Revlogs are read much more frequently than they are written and many
2409 # Revlogs are read much more frequently than they are written and many
2410 # chunks only take microseconds to decompress, so performance is
2410 # chunks only take microseconds to decompress, so performance is
2411 # important here.
2411 # important here.
2412 #
2412 #
2413 # We can make a few assumptions about revlogs:
2413 # We can make a few assumptions about revlogs:
2414 #
2414 #
2415 # 1) the majority of chunks will be compressed (as opposed to inline
2415 # 1) the majority of chunks will be compressed (as opposed to inline
2416 # raw data).
2416 # raw data).
2417 # 2) decompressing *any* data will likely by at least 10x slower than
2417 # 2) decompressing *any* data will likely by at least 10x slower than
2418 # returning raw inline data.
2418 # returning raw inline data.
2419 # 3) we want to prioritize common and officially supported compression
2419 # 3) we want to prioritize common and officially supported compression
2420 # engines
2420 # engines
2421 #
2421 #
2422 # It follows that we want to optimize for "decompress compressed data
2422 # It follows that we want to optimize for "decompress compressed data
2423 # when encoded with common and officially supported compression engines"
2423 # when encoded with common and officially supported compression engines"
2424 # case over "raw data" and "data encoded by less common or non-official
2424 # case over "raw data" and "data encoded by less common or non-official
2425 # compression engines." That is why we have the inline lookup first
2425 # compression engines." That is why we have the inline lookup first
2426 # followed by the compengines lookup.
2426 # followed by the compengines lookup.
2427 #
2427 #
2428 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2428 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2429 # compressed chunks. And this matters for changelog and manifest reads.
2429 # compressed chunks. And this matters for changelog and manifest reads.
2430 t = data[0:1]
2430 t = data[0:1]
2431
2431
2432 if t == b'x':
2432 if t == b'x':
2433 try:
2433 try:
2434 return _zlibdecompress(data)
2434 return _zlibdecompress(data)
2435 except zlib.error as e:
2435 except zlib.error as e:
2436 raise error.RevlogError(
2436 raise error.RevlogError(
2437 _(b'revlog decompress error: %s')
2437 _(b'revlog decompress error: %s')
2438 % stringutil.forcebytestr(e)
2438 % stringutil.forcebytestr(e)
2439 )
2439 )
2440 # '\0' is more common than 'u' so it goes first.
2440 # '\0' is more common than 'u' so it goes first.
2441 elif t == b'\0':
2441 elif t == b'\0':
2442 return data
2442 return data
2443 elif t == b'u':
2443 elif t == b'u':
2444 return util.buffer(data, 1)
2444 return util.buffer(data, 1)
2445
2445
2446 compressor = self._get_decompressor(t)
2446 compressor = self._get_decompressor(t)
2447
2447
2448 return compressor.decompress(data)
2448 return compressor.decompress(data)
2449
2449
2450 def _addrevision(
2450 def _addrevision(
2451 self,
2451 self,
2452 node,
2452 node,
2453 rawtext,
2453 rawtext,
2454 transaction,
2454 transaction,
2455 link,
2455 link,
2456 p1,
2456 p1,
2457 p2,
2457 p2,
2458 flags,
2458 flags,
2459 cachedelta,
2459 cachedelta,
2460 alwayscache=False,
2460 alwayscache=False,
2461 deltacomputer=None,
2461 deltacomputer=None,
2462 sidedata=None,
2462 sidedata=None,
2463 ):
2463 ):
2464 """internal function to add revisions to the log
2464 """internal function to add revisions to the log
2465
2465
2466 see addrevision for argument descriptions.
2466 see addrevision for argument descriptions.
2467
2467
2468 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2468 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2469
2469
2470 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2470 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2471 be used.
2471 be used.
2472
2472
2473 invariants:
2473 invariants:
2474 - rawtext is optional (can be None); if not set, cachedelta must be set.
2474 - rawtext is optional (can be None); if not set, cachedelta must be set.
2475 if both are set, they must correspond to each other.
2475 if both are set, they must correspond to each other.
2476 """
2476 """
2477 if node == self.nullid:
2477 if node == self.nullid:
2478 raise error.RevlogError(
2478 raise error.RevlogError(
2479 _(b"%s: attempt to add null revision") % self.display_id
2479 _(b"%s: attempt to add null revision") % self.display_id
2480 )
2480 )
2481 if (
2481 if (
2482 node == self.nodeconstants.wdirid
2482 node == self.nodeconstants.wdirid
2483 or node in self.nodeconstants.wdirfilenodeids
2483 or node in self.nodeconstants.wdirfilenodeids
2484 ):
2484 ):
2485 raise error.RevlogError(
2485 raise error.RevlogError(
2486 _(b"%s: attempt to add wdir revision") % self.display_id
2486 _(b"%s: attempt to add wdir revision") % self.display_id
2487 )
2487 )
2488 if self._writinghandles is None:
2488 if self._writinghandles is None:
2489 msg = b'adding revision outside `revlog._writing` context'
2489 msg = b'adding revision outside `revlog._writing` context'
2490 raise error.ProgrammingError(msg)
2490 raise error.ProgrammingError(msg)
2491
2491
2492 if self._inline:
2492 if self._inline:
2493 fh = self._writinghandles[0]
2493 fh = self._writinghandles[0]
2494 else:
2494 else:
2495 fh = self._writinghandles[1]
2495 fh = self._writinghandles[1]
2496
2496
2497 btext = [rawtext]
2497 btext = [rawtext]
2498
2498
2499 curr = len(self)
2499 curr = len(self)
2500 prev = curr - 1
2500 prev = curr - 1
2501
2501
2502 offset = self._get_data_offset(prev)
2502 offset = self._get_data_offset(prev)
2503
2503
2504 if self._concurrencychecker:
2504 if self._concurrencychecker:
2505 ifh, dfh, sdfh = self._writinghandles
2505 ifh, dfh, sdfh = self._writinghandles
2506 # XXX no checking for the sidedata file
2506 # XXX no checking for the sidedata file
2507 if self._inline:
2507 if self._inline:
2508 # offset is "as if" it were in the .d file, so we need to add on
2508 # offset is "as if" it were in the .d file, so we need to add on
2509 # the size of the entry metadata.
2509 # the size of the entry metadata.
2510 self._concurrencychecker(
2510 self._concurrencychecker(
2511 ifh, self._indexfile, offset + curr * self.index.entry_size
2511 ifh, self._indexfile, offset + curr * self.index.entry_size
2512 )
2512 )
2513 else:
2513 else:
2514 # Entries in the .i are a consistent size.
2514 # Entries in the .i are a consistent size.
2515 self._concurrencychecker(
2515 self._concurrencychecker(
2516 ifh, self._indexfile, curr * self.index.entry_size
2516 ifh, self._indexfile, curr * self.index.entry_size
2517 )
2517 )
2518 self._concurrencychecker(dfh, self._datafile, offset)
2518 self._concurrencychecker(dfh, self._datafile, offset)
2519
2519
2520 p1r, p2r = self.rev(p1), self.rev(p2)
2520 p1r, p2r = self.rev(p1), self.rev(p2)
2521
2521
2522 # full versions are inserted when the needed deltas
2522 # full versions are inserted when the needed deltas
2523 # become comparable to the uncompressed text
2523 # become comparable to the uncompressed text
2524 if rawtext is None:
2524 if rawtext is None:
2525 # need rawtext size, before changed by flag processors, which is
2525 # need rawtext size, before changed by flag processors, which is
2526 # the non-raw size. use revlog explicitly to avoid filelog's extra
2526 # the non-raw size. use revlog explicitly to avoid filelog's extra
2527 # logic that might remove metadata size.
2527 # logic that might remove metadata size.
2528 textlen = mdiff.patchedsize(
2528 textlen = mdiff.patchedsize(
2529 revlog.size(self, cachedelta[0]), cachedelta[1]
2529 revlog.size(self, cachedelta[0]), cachedelta[1]
2530 )
2530 )
2531 else:
2531 else:
2532 textlen = len(rawtext)
2532 textlen = len(rawtext)
2533
2533
2534 if deltacomputer is None:
2534 if deltacomputer is None:
2535 deltacomputer = deltautil.deltacomputer(self)
2535 deltacomputer = deltautil.deltacomputer(self)
2536
2536
2537 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2537 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2538
2538
2539 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2539 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2540
2540
2541 compression_mode = COMP_MODE_INLINE
2541 compression_mode = COMP_MODE_INLINE
2542 if self._docket is not None:
2542 if self._docket is not None:
2543 h, d = deltainfo.data
2543 h, d = deltainfo.data
2544 if not h and not d:
2544 if not h and not d:
2545 # not data to store at all... declare them uncompressed
2545 # not data to store at all... declare them uncompressed
2546 compression_mode = COMP_MODE_PLAIN
2546 compression_mode = COMP_MODE_PLAIN
2547 elif not h:
2547 elif not h:
2548 t = d[0:1]
2548 t = d[0:1]
2549 if t == b'\0':
2549 if t == b'\0':
2550 compression_mode = COMP_MODE_PLAIN
2550 compression_mode = COMP_MODE_PLAIN
2551 elif t == self._docket.default_compression_header:
2551 elif t == self._docket.default_compression_header:
2552 compression_mode = COMP_MODE_DEFAULT
2552 compression_mode = COMP_MODE_DEFAULT
2553 elif h == b'u':
2553 elif h == b'u':
2554 # we have a more efficient way to declare uncompressed
2554 # we have a more efficient way to declare uncompressed
2555 h = b''
2555 h = b''
2556 compression_mode = COMP_MODE_PLAIN
2556 compression_mode = COMP_MODE_PLAIN
2557 deltainfo = deltautil.drop_u_compression(deltainfo)
2557 deltainfo = deltautil.drop_u_compression(deltainfo)
2558
2558
2559 sidedata_compression_mode = COMP_MODE_INLINE
2559 sidedata_compression_mode = COMP_MODE_INLINE
2560 if sidedata and self.hassidedata:
2560 if sidedata and self.hassidedata:
2561 sidedata_compression_mode = COMP_MODE_PLAIN
2561 sidedata_compression_mode = COMP_MODE_PLAIN
2562 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2562 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2563 sidedata_offset = self._docket.sidedata_end
2563 sidedata_offset = self._docket.sidedata_end
2564 h, comp_sidedata = self.compress(serialized_sidedata)
2564 h, comp_sidedata = self.compress(serialized_sidedata)
2565 if (
2565 if (
2566 h != b'u'
2566 h != b'u'
2567 and comp_sidedata[0:1] != b'\0'
2567 and comp_sidedata[0:1] != b'\0'
2568 and len(comp_sidedata) < len(serialized_sidedata)
2568 and len(comp_sidedata) < len(serialized_sidedata)
2569 ):
2569 ):
2570 assert not h
2570 assert not h
2571 if (
2571 if (
2572 comp_sidedata[0:1]
2572 comp_sidedata[0:1]
2573 == self._docket.default_compression_header
2573 == self._docket.default_compression_header
2574 ):
2574 ):
2575 sidedata_compression_mode = COMP_MODE_DEFAULT
2575 sidedata_compression_mode = COMP_MODE_DEFAULT
2576 serialized_sidedata = comp_sidedata
2576 serialized_sidedata = comp_sidedata
2577 else:
2577 else:
2578 sidedata_compression_mode = COMP_MODE_INLINE
2578 sidedata_compression_mode = COMP_MODE_INLINE
2579 serialized_sidedata = comp_sidedata
2579 serialized_sidedata = comp_sidedata
2580 else:
2580 else:
2581 serialized_sidedata = b""
2581 serialized_sidedata = b""
2582 # Don't store the offset if the sidedata is empty, that way
2582 # Don't store the offset if the sidedata is empty, that way
2583 # we can easily detect empty sidedata and they will be no different
2583 # we can easily detect empty sidedata and they will be no different
2584 # than ones we manually add.
2584 # than ones we manually add.
2585 sidedata_offset = 0
2585 sidedata_offset = 0
2586
2586
2587 e = (
2587 e = revlogutils.entry(
2588 revlogutils.offset_type(offset, flags),
2588 flags=flags,
2589 deltainfo.deltalen,
2589 data_offset=offset,
2590 textlen,
2590 data_compressed_length=deltainfo.deltalen,
2591 deltainfo.base,
2591 data_uncompressed_length=textlen,
2592 link,
2592 data_compression_mode=compression_mode,
2593 p1r,
2593 data_delta_base=deltainfo.base,
2594 p2r,
2594 link_rev=link,
2595 node,
2595 parent_rev_1=p1r,
2596 sidedata_offset,
2596 parent_rev_2=p2r,
2597 len(serialized_sidedata),
2597 node_id=node,
2598 compression_mode,
2598 sidedata_offset=sidedata_offset,
2599 sidedata_compression_mode,
2599 sidedata_compressed_length=len(serialized_sidedata),
2600 sidedata_compression_mode=sidedata_compression_mode,
2600 )
2601 )
2601
2602
2602 self.index.append(e)
2603 self.index.append(e)
2603 entry = self.index.entry_binary(curr)
2604 entry = self.index.entry_binary(curr)
2604 if curr == 0 and self._docket is None:
2605 if curr == 0 and self._docket is None:
2605 header = self._format_flags | self._format_version
2606 header = self._format_flags | self._format_version
2606 header = self.index.pack_header(header)
2607 header = self.index.pack_header(header)
2607 entry = header + entry
2608 entry = header + entry
2608 self._writeentry(
2609 self._writeentry(
2609 transaction,
2610 transaction,
2610 entry,
2611 entry,
2611 deltainfo.data,
2612 deltainfo.data,
2612 link,
2613 link,
2613 offset,
2614 offset,
2614 serialized_sidedata,
2615 serialized_sidedata,
2615 sidedata_offset,
2616 sidedata_offset,
2616 )
2617 )
2617
2618
2618 rawtext = btext[0]
2619 rawtext = btext[0]
2619
2620
2620 if alwayscache and rawtext is None:
2621 if alwayscache and rawtext is None:
2621 rawtext = deltacomputer.buildtext(revinfo, fh)
2622 rawtext = deltacomputer.buildtext(revinfo, fh)
2622
2623
2623 if type(rawtext) == bytes: # only accept immutable objects
2624 if type(rawtext) == bytes: # only accept immutable objects
2624 self._revisioncache = (node, curr, rawtext)
2625 self._revisioncache = (node, curr, rawtext)
2625 self._chainbasecache[curr] = deltainfo.chainbase
2626 self._chainbasecache[curr] = deltainfo.chainbase
2626 return curr
2627 return curr
2627
2628
2628 def _get_data_offset(self, prev):
2629 def _get_data_offset(self, prev):
2629 """Returns the current offset in the (in-transaction) data file.
2630 """Returns the current offset in the (in-transaction) data file.
2630 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2631 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2631 file to store that information: since sidedata can be rewritten to the
2632 file to store that information: since sidedata can be rewritten to the
2632 end of the data file within a transaction, you can have cases where, for
2633 end of the data file within a transaction, you can have cases where, for
2633 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2634 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2634 to `n - 1`'s sidedata being written after `n`'s data.
2635 to `n - 1`'s sidedata being written after `n`'s data.
2635
2636
2636 TODO cache this in a docket file before getting out of experimental."""
2637 TODO cache this in a docket file before getting out of experimental."""
2637 if self._docket is None:
2638 if self._docket is None:
2638 return self.end(prev)
2639 return self.end(prev)
2639 else:
2640 else:
2640 return self._docket.data_end
2641 return self._docket.data_end
2641
2642
2642 def _writeentry(
2643 def _writeentry(
2643 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2644 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2644 ):
2645 ):
2645 # Files opened in a+ mode have inconsistent behavior on various
2646 # Files opened in a+ mode have inconsistent behavior on various
2646 # platforms. Windows requires that a file positioning call be made
2647 # platforms. Windows requires that a file positioning call be made
2647 # when the file handle transitions between reads and writes. See
2648 # when the file handle transitions between reads and writes. See
2648 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2649 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2649 # platforms, Python or the platform itself can be buggy. Some versions
2650 # platforms, Python or the platform itself can be buggy. Some versions
2650 # of Solaris have been observed to not append at the end of the file
2651 # of Solaris have been observed to not append at the end of the file
2651 # if the file was seeked to before the end. See issue4943 for more.
2652 # if the file was seeked to before the end. See issue4943 for more.
2652 #
2653 #
2653 # We work around this issue by inserting a seek() before writing.
2654 # We work around this issue by inserting a seek() before writing.
2654 # Note: This is likely not necessary on Python 3. However, because
2655 # Note: This is likely not necessary on Python 3. However, because
2655 # the file handle is reused for reads and may be seeked there, we need
2656 # the file handle is reused for reads and may be seeked there, we need
2656 # to be careful before changing this.
2657 # to be careful before changing this.
2657 if self._writinghandles is None:
2658 if self._writinghandles is None:
2658 msg = b'adding revision outside `revlog._writing` context'
2659 msg = b'adding revision outside `revlog._writing` context'
2659 raise error.ProgrammingError(msg)
2660 raise error.ProgrammingError(msg)
2660 ifh, dfh, sdfh = self._writinghandles
2661 ifh, dfh, sdfh = self._writinghandles
2661 if self._docket is None:
2662 if self._docket is None:
2662 ifh.seek(0, os.SEEK_END)
2663 ifh.seek(0, os.SEEK_END)
2663 else:
2664 else:
2664 ifh.seek(self._docket.index_end, os.SEEK_SET)
2665 ifh.seek(self._docket.index_end, os.SEEK_SET)
2665 if dfh:
2666 if dfh:
2666 if self._docket is None:
2667 if self._docket is None:
2667 dfh.seek(0, os.SEEK_END)
2668 dfh.seek(0, os.SEEK_END)
2668 else:
2669 else:
2669 dfh.seek(self._docket.data_end, os.SEEK_SET)
2670 dfh.seek(self._docket.data_end, os.SEEK_SET)
2670 if sdfh:
2671 if sdfh:
2671 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2672 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2672
2673
2673 curr = len(self) - 1
2674 curr = len(self) - 1
2674 if not self._inline:
2675 if not self._inline:
2675 transaction.add(self._datafile, offset)
2676 transaction.add(self._datafile, offset)
2676 if self._sidedatafile:
2677 if self._sidedatafile:
2677 transaction.add(self._sidedatafile, sidedata_offset)
2678 transaction.add(self._sidedatafile, sidedata_offset)
2678 transaction.add(self._indexfile, curr * len(entry))
2679 transaction.add(self._indexfile, curr * len(entry))
2679 if data[0]:
2680 if data[0]:
2680 dfh.write(data[0])
2681 dfh.write(data[0])
2681 dfh.write(data[1])
2682 dfh.write(data[1])
2682 if sidedata:
2683 if sidedata:
2683 sdfh.write(sidedata)
2684 sdfh.write(sidedata)
2684 ifh.write(entry)
2685 ifh.write(entry)
2685 else:
2686 else:
2686 offset += curr * self.index.entry_size
2687 offset += curr * self.index.entry_size
2687 transaction.add(self._indexfile, offset)
2688 transaction.add(self._indexfile, offset)
2688 ifh.write(entry)
2689 ifh.write(entry)
2689 ifh.write(data[0])
2690 ifh.write(data[0])
2690 ifh.write(data[1])
2691 ifh.write(data[1])
2691 assert not sidedata
2692 assert not sidedata
2692 self._enforceinlinesize(transaction)
2693 self._enforceinlinesize(transaction)
2693 if self._docket is not None:
2694 if self._docket is not None:
2694 self._docket.index_end = self._writinghandles[0].tell()
2695 self._docket.index_end = self._writinghandles[0].tell()
2695 self._docket.data_end = self._writinghandles[1].tell()
2696 self._docket.data_end = self._writinghandles[1].tell()
2696 self._docket.sidedata_end = self._writinghandles[2].tell()
2697 self._docket.sidedata_end = self._writinghandles[2].tell()
2697
2698
2698 nodemaputil.setup_persistent_nodemap(transaction, self)
2699 nodemaputil.setup_persistent_nodemap(transaction, self)
2699
2700
2700 def addgroup(
2701 def addgroup(
2701 self,
2702 self,
2702 deltas,
2703 deltas,
2703 linkmapper,
2704 linkmapper,
2704 transaction,
2705 transaction,
2705 alwayscache=False,
2706 alwayscache=False,
2706 addrevisioncb=None,
2707 addrevisioncb=None,
2707 duplicaterevisioncb=None,
2708 duplicaterevisioncb=None,
2708 ):
2709 ):
2709 """
2710 """
2710 add a delta group
2711 add a delta group
2711
2712
2712 given a set of deltas, add them to the revision log. the
2713 given a set of deltas, add them to the revision log. the
2713 first delta is against its parent, which should be in our
2714 first delta is against its parent, which should be in our
2714 log, the rest are against the previous delta.
2715 log, the rest are against the previous delta.
2715
2716
2716 If ``addrevisioncb`` is defined, it will be called with arguments of
2717 If ``addrevisioncb`` is defined, it will be called with arguments of
2717 this revlog and the node that was added.
2718 this revlog and the node that was added.
2718 """
2719 """
2719
2720
2720 if self._adding_group:
2721 if self._adding_group:
2721 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2722 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2722
2723
2723 self._adding_group = True
2724 self._adding_group = True
2724 empty = True
2725 empty = True
2725 try:
2726 try:
2726 with self._writing(transaction):
2727 with self._writing(transaction):
2727 deltacomputer = deltautil.deltacomputer(self)
2728 deltacomputer = deltautil.deltacomputer(self)
2728 # loop through our set of deltas
2729 # loop through our set of deltas
2729 for data in deltas:
2730 for data in deltas:
2730 (
2731 (
2731 node,
2732 node,
2732 p1,
2733 p1,
2733 p2,
2734 p2,
2734 linknode,
2735 linknode,
2735 deltabase,
2736 deltabase,
2736 delta,
2737 delta,
2737 flags,
2738 flags,
2738 sidedata,
2739 sidedata,
2739 ) = data
2740 ) = data
2740 link = linkmapper(linknode)
2741 link = linkmapper(linknode)
2741 flags = flags or REVIDX_DEFAULT_FLAGS
2742 flags = flags or REVIDX_DEFAULT_FLAGS
2742
2743
2743 rev = self.index.get_rev(node)
2744 rev = self.index.get_rev(node)
2744 if rev is not None:
2745 if rev is not None:
2745 # this can happen if two branches make the same change
2746 # this can happen if two branches make the same change
2746 self._nodeduplicatecallback(transaction, rev)
2747 self._nodeduplicatecallback(transaction, rev)
2747 if duplicaterevisioncb:
2748 if duplicaterevisioncb:
2748 duplicaterevisioncb(self, rev)
2749 duplicaterevisioncb(self, rev)
2749 empty = False
2750 empty = False
2750 continue
2751 continue
2751
2752
2752 for p in (p1, p2):
2753 for p in (p1, p2):
2753 if not self.index.has_node(p):
2754 if not self.index.has_node(p):
2754 raise error.LookupError(
2755 raise error.LookupError(
2755 p, self.radix, _(b'unknown parent')
2756 p, self.radix, _(b'unknown parent')
2756 )
2757 )
2757
2758
2758 if not self.index.has_node(deltabase):
2759 if not self.index.has_node(deltabase):
2759 raise error.LookupError(
2760 raise error.LookupError(
2760 deltabase, self.display_id, _(b'unknown delta base')
2761 deltabase, self.display_id, _(b'unknown delta base')
2761 )
2762 )
2762
2763
2763 baserev = self.rev(deltabase)
2764 baserev = self.rev(deltabase)
2764
2765
2765 if baserev != nullrev and self.iscensored(baserev):
2766 if baserev != nullrev and self.iscensored(baserev):
2766 # if base is censored, delta must be full replacement in a
2767 # if base is censored, delta must be full replacement in a
2767 # single patch operation
2768 # single patch operation
2768 hlen = struct.calcsize(b">lll")
2769 hlen = struct.calcsize(b">lll")
2769 oldlen = self.rawsize(baserev)
2770 oldlen = self.rawsize(baserev)
2770 newlen = len(delta) - hlen
2771 newlen = len(delta) - hlen
2771 if delta[:hlen] != mdiff.replacediffheader(
2772 if delta[:hlen] != mdiff.replacediffheader(
2772 oldlen, newlen
2773 oldlen, newlen
2773 ):
2774 ):
2774 raise error.CensoredBaseError(
2775 raise error.CensoredBaseError(
2775 self.display_id, self.node(baserev)
2776 self.display_id, self.node(baserev)
2776 )
2777 )
2777
2778
2778 if not flags and self._peek_iscensored(baserev, delta):
2779 if not flags and self._peek_iscensored(baserev, delta):
2779 flags |= REVIDX_ISCENSORED
2780 flags |= REVIDX_ISCENSORED
2780
2781
2781 # We assume consumers of addrevisioncb will want to retrieve
2782 # We assume consumers of addrevisioncb will want to retrieve
2782 # the added revision, which will require a call to
2783 # the added revision, which will require a call to
2783 # revision(). revision() will fast path if there is a cache
2784 # revision(). revision() will fast path if there is a cache
2784 # hit. So, we tell _addrevision() to always cache in this case.
2785 # hit. So, we tell _addrevision() to always cache in this case.
2785 # We're only using addgroup() in the context of changegroup
2786 # We're only using addgroup() in the context of changegroup
2786 # generation so the revision data can always be handled as raw
2787 # generation so the revision data can always be handled as raw
2787 # by the flagprocessor.
2788 # by the flagprocessor.
2788 rev = self._addrevision(
2789 rev = self._addrevision(
2789 node,
2790 node,
2790 None,
2791 None,
2791 transaction,
2792 transaction,
2792 link,
2793 link,
2793 p1,
2794 p1,
2794 p2,
2795 p2,
2795 flags,
2796 flags,
2796 (baserev, delta),
2797 (baserev, delta),
2797 alwayscache=alwayscache,
2798 alwayscache=alwayscache,
2798 deltacomputer=deltacomputer,
2799 deltacomputer=deltacomputer,
2799 sidedata=sidedata,
2800 sidedata=sidedata,
2800 )
2801 )
2801
2802
2802 if addrevisioncb:
2803 if addrevisioncb:
2803 addrevisioncb(self, rev)
2804 addrevisioncb(self, rev)
2804 empty = False
2805 empty = False
2805 finally:
2806 finally:
2806 self._adding_group = False
2807 self._adding_group = False
2807 return not empty
2808 return not empty
2808
2809
2809 def iscensored(self, rev):
2810 def iscensored(self, rev):
2810 """Check if a file revision is censored."""
2811 """Check if a file revision is censored."""
2811 if not self._censorable:
2812 if not self._censorable:
2812 return False
2813 return False
2813
2814
2814 return self.flags(rev) & REVIDX_ISCENSORED
2815 return self.flags(rev) & REVIDX_ISCENSORED
2815
2816
2816 def _peek_iscensored(self, baserev, delta):
2817 def _peek_iscensored(self, baserev, delta):
2817 """Quickly check if a delta produces a censored revision."""
2818 """Quickly check if a delta produces a censored revision."""
2818 if not self._censorable:
2819 if not self._censorable:
2819 return False
2820 return False
2820
2821
2821 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2822 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2822
2823
2823 def getstrippoint(self, minlink):
2824 def getstrippoint(self, minlink):
2824 """find the minimum rev that must be stripped to strip the linkrev
2825 """find the minimum rev that must be stripped to strip the linkrev
2825
2826
2826 Returns a tuple containing the minimum rev and a set of all revs that
2827 Returns a tuple containing the minimum rev and a set of all revs that
2827 have linkrevs that will be broken by this strip.
2828 have linkrevs that will be broken by this strip.
2828 """
2829 """
2829 return storageutil.resolvestripinfo(
2830 return storageutil.resolvestripinfo(
2830 minlink,
2831 minlink,
2831 len(self) - 1,
2832 len(self) - 1,
2832 self.headrevs(),
2833 self.headrevs(),
2833 self.linkrev,
2834 self.linkrev,
2834 self.parentrevs,
2835 self.parentrevs,
2835 )
2836 )
2836
2837
2837 def strip(self, minlink, transaction):
2838 def strip(self, minlink, transaction):
2838 """truncate the revlog on the first revision with a linkrev >= minlink
2839 """truncate the revlog on the first revision with a linkrev >= minlink
2839
2840
2840 This function is called when we're stripping revision minlink and
2841 This function is called when we're stripping revision minlink and
2841 its descendants from the repository.
2842 its descendants from the repository.
2842
2843
2843 We have to remove all revisions with linkrev >= minlink, because
2844 We have to remove all revisions with linkrev >= minlink, because
2844 the equivalent changelog revisions will be renumbered after the
2845 the equivalent changelog revisions will be renumbered after the
2845 strip.
2846 strip.
2846
2847
2847 So we truncate the revlog on the first of these revisions, and
2848 So we truncate the revlog on the first of these revisions, and
2848 trust that the caller has saved the revisions that shouldn't be
2849 trust that the caller has saved the revisions that shouldn't be
2849 removed and that it'll re-add them after this truncation.
2850 removed and that it'll re-add them after this truncation.
2850 """
2851 """
2851 if len(self) == 0:
2852 if len(self) == 0:
2852 return
2853 return
2853
2854
2854 rev, _ = self.getstrippoint(minlink)
2855 rev, _ = self.getstrippoint(minlink)
2855 if rev == len(self):
2856 if rev == len(self):
2856 return
2857 return
2857
2858
2858 # first truncate the files on disk
2859 # first truncate the files on disk
2859 data_end = self.start(rev)
2860 data_end = self.start(rev)
2860 if not self._inline:
2861 if not self._inline:
2861 transaction.add(self._datafile, data_end)
2862 transaction.add(self._datafile, data_end)
2862 end = rev * self.index.entry_size
2863 end = rev * self.index.entry_size
2863 else:
2864 else:
2864 end = data_end + (rev * self.index.entry_size)
2865 end = data_end + (rev * self.index.entry_size)
2865
2866
2866 if self._sidedatafile:
2867 if self._sidedatafile:
2867 sidedata_end = self.sidedata_cut_off(rev)
2868 sidedata_end = self.sidedata_cut_off(rev)
2868 transaction.add(self._sidedatafile, sidedata_end)
2869 transaction.add(self._sidedatafile, sidedata_end)
2869
2870
2870 transaction.add(self._indexfile, end)
2871 transaction.add(self._indexfile, end)
2871 if self._docket is not None:
2872 if self._docket is not None:
2872 # XXX we could, leverage the docket while stripping. However it is
2873 # XXX we could, leverage the docket while stripping. However it is
2873 # not powerfull enough at the time of this comment
2874 # not powerfull enough at the time of this comment
2874 self._docket.index_end = end
2875 self._docket.index_end = end
2875 self._docket.data_end = data_end
2876 self._docket.data_end = data_end
2876 self._docket.sidedata_end = sidedata_end
2877 self._docket.sidedata_end = sidedata_end
2877 self._docket.write(transaction, stripping=True)
2878 self._docket.write(transaction, stripping=True)
2878
2879
2879 # then reset internal state in memory to forget those revisions
2880 # then reset internal state in memory to forget those revisions
2880 self._revisioncache = None
2881 self._revisioncache = None
2881 self._chaininfocache = util.lrucachedict(500)
2882 self._chaininfocache = util.lrucachedict(500)
2882 self._chunkclear()
2883 self._chunkclear()
2883
2884
2884 del self.index[rev:-1]
2885 del self.index[rev:-1]
2885
2886
2886 def checksize(self):
2887 def checksize(self):
2887 """Check size of index and data files
2888 """Check size of index and data files
2888
2889
2889 return a (dd, di) tuple.
2890 return a (dd, di) tuple.
2890 - dd: extra bytes for the "data" file
2891 - dd: extra bytes for the "data" file
2891 - di: extra bytes for the "index" file
2892 - di: extra bytes for the "index" file
2892
2893
2893 A healthy revlog will return (0, 0).
2894 A healthy revlog will return (0, 0).
2894 """
2895 """
2895 expected = 0
2896 expected = 0
2896 if len(self):
2897 if len(self):
2897 expected = max(0, self.end(len(self) - 1))
2898 expected = max(0, self.end(len(self) - 1))
2898
2899
2899 try:
2900 try:
2900 with self._datafp() as f:
2901 with self._datafp() as f:
2901 f.seek(0, io.SEEK_END)
2902 f.seek(0, io.SEEK_END)
2902 actual = f.tell()
2903 actual = f.tell()
2903 dd = actual - expected
2904 dd = actual - expected
2904 except IOError as inst:
2905 except IOError as inst:
2905 if inst.errno != errno.ENOENT:
2906 if inst.errno != errno.ENOENT:
2906 raise
2907 raise
2907 dd = 0
2908 dd = 0
2908
2909
2909 try:
2910 try:
2910 f = self.opener(self._indexfile)
2911 f = self.opener(self._indexfile)
2911 f.seek(0, io.SEEK_END)
2912 f.seek(0, io.SEEK_END)
2912 actual = f.tell()
2913 actual = f.tell()
2913 f.close()
2914 f.close()
2914 s = self.index.entry_size
2915 s = self.index.entry_size
2915 i = max(0, actual // s)
2916 i = max(0, actual // s)
2916 di = actual - (i * s)
2917 di = actual - (i * s)
2917 if self._inline:
2918 if self._inline:
2918 databytes = 0
2919 databytes = 0
2919 for r in self:
2920 for r in self:
2920 databytes += max(0, self.length(r))
2921 databytes += max(0, self.length(r))
2921 dd = 0
2922 dd = 0
2922 di = actual - len(self) * s - databytes
2923 di = actual - len(self) * s - databytes
2923 except IOError as inst:
2924 except IOError as inst:
2924 if inst.errno != errno.ENOENT:
2925 if inst.errno != errno.ENOENT:
2925 raise
2926 raise
2926 di = 0
2927 di = 0
2927
2928
2928 return (dd, di)
2929 return (dd, di)
2929
2930
2930 def files(self):
2931 def files(self):
2931 res = [self._indexfile]
2932 res = [self._indexfile]
2932 if not self._inline:
2933 if not self._inline:
2933 res.append(self._datafile)
2934 res.append(self._datafile)
2934 return res
2935 return res
2935
2936
2936 def emitrevisions(
2937 def emitrevisions(
2937 self,
2938 self,
2938 nodes,
2939 nodes,
2939 nodesorder=None,
2940 nodesorder=None,
2940 revisiondata=False,
2941 revisiondata=False,
2941 assumehaveparentrevisions=False,
2942 assumehaveparentrevisions=False,
2942 deltamode=repository.CG_DELTAMODE_STD,
2943 deltamode=repository.CG_DELTAMODE_STD,
2943 sidedata_helpers=None,
2944 sidedata_helpers=None,
2944 ):
2945 ):
2945 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2946 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2946 raise error.ProgrammingError(
2947 raise error.ProgrammingError(
2947 b'unhandled value for nodesorder: %s' % nodesorder
2948 b'unhandled value for nodesorder: %s' % nodesorder
2948 )
2949 )
2949
2950
2950 if nodesorder is None and not self._generaldelta:
2951 if nodesorder is None and not self._generaldelta:
2951 nodesorder = b'storage'
2952 nodesorder = b'storage'
2952
2953
2953 if (
2954 if (
2954 not self._storedeltachains
2955 not self._storedeltachains
2955 and deltamode != repository.CG_DELTAMODE_PREV
2956 and deltamode != repository.CG_DELTAMODE_PREV
2956 ):
2957 ):
2957 deltamode = repository.CG_DELTAMODE_FULL
2958 deltamode = repository.CG_DELTAMODE_FULL
2958
2959
2959 return storageutil.emitrevisions(
2960 return storageutil.emitrevisions(
2960 self,
2961 self,
2961 nodes,
2962 nodes,
2962 nodesorder,
2963 nodesorder,
2963 revlogrevisiondelta,
2964 revlogrevisiondelta,
2964 deltaparentfn=self.deltaparent,
2965 deltaparentfn=self.deltaparent,
2965 candeltafn=self.candelta,
2966 candeltafn=self.candelta,
2966 rawsizefn=self.rawsize,
2967 rawsizefn=self.rawsize,
2967 revdifffn=self.revdiff,
2968 revdifffn=self.revdiff,
2968 flagsfn=self.flags,
2969 flagsfn=self.flags,
2969 deltamode=deltamode,
2970 deltamode=deltamode,
2970 revisiondata=revisiondata,
2971 revisiondata=revisiondata,
2971 assumehaveparentrevisions=assumehaveparentrevisions,
2972 assumehaveparentrevisions=assumehaveparentrevisions,
2972 sidedata_helpers=sidedata_helpers,
2973 sidedata_helpers=sidedata_helpers,
2973 )
2974 )
2974
2975
2975 DELTAREUSEALWAYS = b'always'
2976 DELTAREUSEALWAYS = b'always'
2976 DELTAREUSESAMEREVS = b'samerevs'
2977 DELTAREUSESAMEREVS = b'samerevs'
2977 DELTAREUSENEVER = b'never'
2978 DELTAREUSENEVER = b'never'
2978
2979
2979 DELTAREUSEFULLADD = b'fulladd'
2980 DELTAREUSEFULLADD = b'fulladd'
2980
2981
2981 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2982 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2982
2983
2983 def clone(
2984 def clone(
2984 self,
2985 self,
2985 tr,
2986 tr,
2986 destrevlog,
2987 destrevlog,
2987 addrevisioncb=None,
2988 addrevisioncb=None,
2988 deltareuse=DELTAREUSESAMEREVS,
2989 deltareuse=DELTAREUSESAMEREVS,
2989 forcedeltabothparents=None,
2990 forcedeltabothparents=None,
2990 sidedata_helpers=None,
2991 sidedata_helpers=None,
2991 ):
2992 ):
2992 """Copy this revlog to another, possibly with format changes.
2993 """Copy this revlog to another, possibly with format changes.
2993
2994
2994 The destination revlog will contain the same revisions and nodes.
2995 The destination revlog will contain the same revisions and nodes.
2995 However, it may not be bit-for-bit identical due to e.g. delta encoding
2996 However, it may not be bit-for-bit identical due to e.g. delta encoding
2996 differences.
2997 differences.
2997
2998
2998 The ``deltareuse`` argument control how deltas from the existing revlog
2999 The ``deltareuse`` argument control how deltas from the existing revlog
2999 are preserved in the destination revlog. The argument can have the
3000 are preserved in the destination revlog. The argument can have the
3000 following values:
3001 following values:
3001
3002
3002 DELTAREUSEALWAYS
3003 DELTAREUSEALWAYS
3003 Deltas will always be reused (if possible), even if the destination
3004 Deltas will always be reused (if possible), even if the destination
3004 revlog would not select the same revisions for the delta. This is the
3005 revlog would not select the same revisions for the delta. This is the
3005 fastest mode of operation.
3006 fastest mode of operation.
3006 DELTAREUSESAMEREVS
3007 DELTAREUSESAMEREVS
3007 Deltas will be reused if the destination revlog would pick the same
3008 Deltas will be reused if the destination revlog would pick the same
3008 revisions for the delta. This mode strikes a balance between speed
3009 revisions for the delta. This mode strikes a balance between speed
3009 and optimization.
3010 and optimization.
3010 DELTAREUSENEVER
3011 DELTAREUSENEVER
3011 Deltas will never be reused. This is the slowest mode of execution.
3012 Deltas will never be reused. This is the slowest mode of execution.
3012 This mode can be used to recompute deltas (e.g. if the diff/delta
3013 This mode can be used to recompute deltas (e.g. if the diff/delta
3013 algorithm changes).
3014 algorithm changes).
3014 DELTAREUSEFULLADD
3015 DELTAREUSEFULLADD
3015 Revision will be re-added as if their were new content. This is
3016 Revision will be re-added as if their were new content. This is
3016 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3017 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3017 eg: large file detection and handling.
3018 eg: large file detection and handling.
3018
3019
3019 Delta computation can be slow, so the choice of delta reuse policy can
3020 Delta computation can be slow, so the choice of delta reuse policy can
3020 significantly affect run time.
3021 significantly affect run time.
3021
3022
3022 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3023 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3023 two extremes. Deltas will be reused if they are appropriate. But if the
3024 two extremes. Deltas will be reused if they are appropriate. But if the
3024 delta could choose a better revision, it will do so. This means if you
3025 delta could choose a better revision, it will do so. This means if you
3025 are converting a non-generaldelta revlog to a generaldelta revlog,
3026 are converting a non-generaldelta revlog to a generaldelta revlog,
3026 deltas will be recomputed if the delta's parent isn't a parent of the
3027 deltas will be recomputed if the delta's parent isn't a parent of the
3027 revision.
3028 revision.
3028
3029
3029 In addition to the delta policy, the ``forcedeltabothparents``
3030 In addition to the delta policy, the ``forcedeltabothparents``
3030 argument controls whether to force compute deltas against both parents
3031 argument controls whether to force compute deltas against both parents
3031 for merges. By default, the current default is used.
3032 for merges. By default, the current default is used.
3032
3033
3033 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3034 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3034 `sidedata_helpers`.
3035 `sidedata_helpers`.
3035 """
3036 """
3036 if deltareuse not in self.DELTAREUSEALL:
3037 if deltareuse not in self.DELTAREUSEALL:
3037 raise ValueError(
3038 raise ValueError(
3038 _(b'value for deltareuse invalid: %s') % deltareuse
3039 _(b'value for deltareuse invalid: %s') % deltareuse
3039 )
3040 )
3040
3041
3041 if len(destrevlog):
3042 if len(destrevlog):
3042 raise ValueError(_(b'destination revlog is not empty'))
3043 raise ValueError(_(b'destination revlog is not empty'))
3043
3044
3044 if getattr(self, 'filteredrevs', None):
3045 if getattr(self, 'filteredrevs', None):
3045 raise ValueError(_(b'source revlog has filtered revisions'))
3046 raise ValueError(_(b'source revlog has filtered revisions'))
3046 if getattr(destrevlog, 'filteredrevs', None):
3047 if getattr(destrevlog, 'filteredrevs', None):
3047 raise ValueError(_(b'destination revlog has filtered revisions'))
3048 raise ValueError(_(b'destination revlog has filtered revisions'))
3048
3049
3049 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3050 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3050 # if possible.
3051 # if possible.
3051 oldlazydelta = destrevlog._lazydelta
3052 oldlazydelta = destrevlog._lazydelta
3052 oldlazydeltabase = destrevlog._lazydeltabase
3053 oldlazydeltabase = destrevlog._lazydeltabase
3053 oldamd = destrevlog._deltabothparents
3054 oldamd = destrevlog._deltabothparents
3054
3055
3055 try:
3056 try:
3056 if deltareuse == self.DELTAREUSEALWAYS:
3057 if deltareuse == self.DELTAREUSEALWAYS:
3057 destrevlog._lazydeltabase = True
3058 destrevlog._lazydeltabase = True
3058 destrevlog._lazydelta = True
3059 destrevlog._lazydelta = True
3059 elif deltareuse == self.DELTAREUSESAMEREVS:
3060 elif deltareuse == self.DELTAREUSESAMEREVS:
3060 destrevlog._lazydeltabase = False
3061 destrevlog._lazydeltabase = False
3061 destrevlog._lazydelta = True
3062 destrevlog._lazydelta = True
3062 elif deltareuse == self.DELTAREUSENEVER:
3063 elif deltareuse == self.DELTAREUSENEVER:
3063 destrevlog._lazydeltabase = False
3064 destrevlog._lazydeltabase = False
3064 destrevlog._lazydelta = False
3065 destrevlog._lazydelta = False
3065
3066
3066 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3067 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3067
3068
3068 self._clone(
3069 self._clone(
3069 tr,
3070 tr,
3070 destrevlog,
3071 destrevlog,
3071 addrevisioncb,
3072 addrevisioncb,
3072 deltareuse,
3073 deltareuse,
3073 forcedeltabothparents,
3074 forcedeltabothparents,
3074 sidedata_helpers,
3075 sidedata_helpers,
3075 )
3076 )
3076
3077
3077 finally:
3078 finally:
3078 destrevlog._lazydelta = oldlazydelta
3079 destrevlog._lazydelta = oldlazydelta
3079 destrevlog._lazydeltabase = oldlazydeltabase
3080 destrevlog._lazydeltabase = oldlazydeltabase
3080 destrevlog._deltabothparents = oldamd
3081 destrevlog._deltabothparents = oldamd
3081
3082
3082 def _clone(
3083 def _clone(
3083 self,
3084 self,
3084 tr,
3085 tr,
3085 destrevlog,
3086 destrevlog,
3086 addrevisioncb,
3087 addrevisioncb,
3087 deltareuse,
3088 deltareuse,
3088 forcedeltabothparents,
3089 forcedeltabothparents,
3089 sidedata_helpers,
3090 sidedata_helpers,
3090 ):
3091 ):
3091 """perform the core duty of `revlog.clone` after parameter processing"""
3092 """perform the core duty of `revlog.clone` after parameter processing"""
3092 deltacomputer = deltautil.deltacomputer(destrevlog)
3093 deltacomputer = deltautil.deltacomputer(destrevlog)
3093 index = self.index
3094 index = self.index
3094 for rev in self:
3095 for rev in self:
3095 entry = index[rev]
3096 entry = index[rev]
3096
3097
3097 # Some classes override linkrev to take filtered revs into
3098 # Some classes override linkrev to take filtered revs into
3098 # account. Use raw entry from index.
3099 # account. Use raw entry from index.
3099 flags = entry[0] & 0xFFFF
3100 flags = entry[0] & 0xFFFF
3100 linkrev = entry[4]
3101 linkrev = entry[4]
3101 p1 = index[entry[5]][7]
3102 p1 = index[entry[5]][7]
3102 p2 = index[entry[6]][7]
3103 p2 = index[entry[6]][7]
3103 node = entry[7]
3104 node = entry[7]
3104
3105
3105 # (Possibly) reuse the delta from the revlog if allowed and
3106 # (Possibly) reuse the delta from the revlog if allowed and
3106 # the revlog chunk is a delta.
3107 # the revlog chunk is a delta.
3107 cachedelta = None
3108 cachedelta = None
3108 rawtext = None
3109 rawtext = None
3109 if deltareuse == self.DELTAREUSEFULLADD:
3110 if deltareuse == self.DELTAREUSEFULLADD:
3110 text = self._revisiondata(rev)
3111 text = self._revisiondata(rev)
3111 sidedata = self.sidedata(rev)
3112 sidedata = self.sidedata(rev)
3112
3113
3113 if sidedata_helpers is not None:
3114 if sidedata_helpers is not None:
3114 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3115 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3115 self, sidedata_helpers, sidedata, rev
3116 self, sidedata_helpers, sidedata, rev
3116 )
3117 )
3117 flags = flags | new_flags[0] & ~new_flags[1]
3118 flags = flags | new_flags[0] & ~new_flags[1]
3118
3119
3119 destrevlog.addrevision(
3120 destrevlog.addrevision(
3120 text,
3121 text,
3121 tr,
3122 tr,
3122 linkrev,
3123 linkrev,
3123 p1,
3124 p1,
3124 p2,
3125 p2,
3125 cachedelta=cachedelta,
3126 cachedelta=cachedelta,
3126 node=node,
3127 node=node,
3127 flags=flags,
3128 flags=flags,
3128 deltacomputer=deltacomputer,
3129 deltacomputer=deltacomputer,
3129 sidedata=sidedata,
3130 sidedata=sidedata,
3130 )
3131 )
3131 else:
3132 else:
3132 if destrevlog._lazydelta:
3133 if destrevlog._lazydelta:
3133 dp = self.deltaparent(rev)
3134 dp = self.deltaparent(rev)
3134 if dp != nullrev:
3135 if dp != nullrev:
3135 cachedelta = (dp, bytes(self._chunk(rev)))
3136 cachedelta = (dp, bytes(self._chunk(rev)))
3136
3137
3137 sidedata = None
3138 sidedata = None
3138 if not cachedelta:
3139 if not cachedelta:
3139 rawtext = self._revisiondata(rev)
3140 rawtext = self._revisiondata(rev)
3140 sidedata = self.sidedata(rev)
3141 sidedata = self.sidedata(rev)
3141 if sidedata is None:
3142 if sidedata is None:
3142 sidedata = self.sidedata(rev)
3143 sidedata = self.sidedata(rev)
3143
3144
3144 if sidedata_helpers is not None:
3145 if sidedata_helpers is not None:
3145 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3146 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3146 self, sidedata_helpers, sidedata, rev
3147 self, sidedata_helpers, sidedata, rev
3147 )
3148 )
3148 flags = flags | new_flags[0] & ~new_flags[1]
3149 flags = flags | new_flags[0] & ~new_flags[1]
3149
3150
3150 with destrevlog._writing(tr):
3151 with destrevlog._writing(tr):
3151 destrevlog._addrevision(
3152 destrevlog._addrevision(
3152 node,
3153 node,
3153 rawtext,
3154 rawtext,
3154 tr,
3155 tr,
3155 linkrev,
3156 linkrev,
3156 p1,
3157 p1,
3157 p2,
3158 p2,
3158 flags,
3159 flags,
3159 cachedelta,
3160 cachedelta,
3160 deltacomputer=deltacomputer,
3161 deltacomputer=deltacomputer,
3161 sidedata=sidedata,
3162 sidedata=sidedata,
3162 )
3163 )
3163
3164
3164 if addrevisioncb:
3165 if addrevisioncb:
3165 addrevisioncb(self, rev, node)
3166 addrevisioncb(self, rev, node)
3166
3167
3167 def censorrevision(self, tr, censornode, tombstone=b''):
3168 def censorrevision(self, tr, censornode, tombstone=b''):
3168 if self._format_version == REVLOGV0:
3169 if self._format_version == REVLOGV0:
3169 raise error.RevlogError(
3170 raise error.RevlogError(
3170 _(b'cannot censor with version %d revlogs')
3171 _(b'cannot censor with version %d revlogs')
3171 % self._format_version
3172 % self._format_version
3172 )
3173 )
3173 elif self._format_version == REVLOGV1:
3174 elif self._format_version == REVLOGV1:
3174 censor.v1_censor(self, tr, censornode, tombstone)
3175 censor.v1_censor(self, tr, censornode, tombstone)
3175 else:
3176 else:
3176 # revlog v2
3177 # revlog v2
3177 raise error.RevlogError(
3178 raise error.RevlogError(
3178 _(b'cannot censor with version %d revlogs')
3179 _(b'cannot censor with version %d revlogs')
3179 % self._format_version
3180 % self._format_version
3180 )
3181 )
3181
3182
3182 def verifyintegrity(self, state):
3183 def verifyintegrity(self, state):
3183 """Verifies the integrity of the revlog.
3184 """Verifies the integrity of the revlog.
3184
3185
3185 Yields ``revlogproblem`` instances describing problems that are
3186 Yields ``revlogproblem`` instances describing problems that are
3186 found.
3187 found.
3187 """
3188 """
3188 dd, di = self.checksize()
3189 dd, di = self.checksize()
3189 if dd:
3190 if dd:
3190 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3191 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3191 if di:
3192 if di:
3192 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3193 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3193
3194
3194 version = self._format_version
3195 version = self._format_version
3195
3196
3196 # The verifier tells us what version revlog we should be.
3197 # The verifier tells us what version revlog we should be.
3197 if version != state[b'expectedversion']:
3198 if version != state[b'expectedversion']:
3198 yield revlogproblem(
3199 yield revlogproblem(
3199 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3200 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3200 % (self.display_id, version, state[b'expectedversion'])
3201 % (self.display_id, version, state[b'expectedversion'])
3201 )
3202 )
3202
3203
3203 state[b'skipread'] = set()
3204 state[b'skipread'] = set()
3204 state[b'safe_renamed'] = set()
3205 state[b'safe_renamed'] = set()
3205
3206
3206 for rev in self:
3207 for rev in self:
3207 node = self.node(rev)
3208 node = self.node(rev)
3208
3209
3209 # Verify contents. 4 cases to care about:
3210 # Verify contents. 4 cases to care about:
3210 #
3211 #
3211 # common: the most common case
3212 # common: the most common case
3212 # rename: with a rename
3213 # rename: with a rename
3213 # meta: file content starts with b'\1\n', the metadata
3214 # meta: file content starts with b'\1\n', the metadata
3214 # header defined in filelog.py, but without a rename
3215 # header defined in filelog.py, but without a rename
3215 # ext: content stored externally
3216 # ext: content stored externally
3216 #
3217 #
3217 # More formally, their differences are shown below:
3218 # More formally, their differences are shown below:
3218 #
3219 #
3219 # | common | rename | meta | ext
3220 # | common | rename | meta | ext
3220 # -------------------------------------------------------
3221 # -------------------------------------------------------
3221 # flags() | 0 | 0 | 0 | not 0
3222 # flags() | 0 | 0 | 0 | not 0
3222 # renamed() | False | True | False | ?
3223 # renamed() | False | True | False | ?
3223 # rawtext[0:2]=='\1\n'| False | True | True | ?
3224 # rawtext[0:2]=='\1\n'| False | True | True | ?
3224 #
3225 #
3225 # "rawtext" means the raw text stored in revlog data, which
3226 # "rawtext" means the raw text stored in revlog data, which
3226 # could be retrieved by "rawdata(rev)". "text"
3227 # could be retrieved by "rawdata(rev)". "text"
3227 # mentioned below is "revision(rev)".
3228 # mentioned below is "revision(rev)".
3228 #
3229 #
3229 # There are 3 different lengths stored physically:
3230 # There are 3 different lengths stored physically:
3230 # 1. L1: rawsize, stored in revlog index
3231 # 1. L1: rawsize, stored in revlog index
3231 # 2. L2: len(rawtext), stored in revlog data
3232 # 2. L2: len(rawtext), stored in revlog data
3232 # 3. L3: len(text), stored in revlog data if flags==0, or
3233 # 3. L3: len(text), stored in revlog data if flags==0, or
3233 # possibly somewhere else if flags!=0
3234 # possibly somewhere else if flags!=0
3234 #
3235 #
3235 # L1 should be equal to L2. L3 could be different from them.
3236 # L1 should be equal to L2. L3 could be different from them.
3236 # "text" may or may not affect commit hash depending on flag
3237 # "text" may or may not affect commit hash depending on flag
3237 # processors (see flagutil.addflagprocessor).
3238 # processors (see flagutil.addflagprocessor).
3238 #
3239 #
3239 # | common | rename | meta | ext
3240 # | common | rename | meta | ext
3240 # -------------------------------------------------
3241 # -------------------------------------------------
3241 # rawsize() | L1 | L1 | L1 | L1
3242 # rawsize() | L1 | L1 | L1 | L1
3242 # size() | L1 | L2-LM | L1(*) | L1 (?)
3243 # size() | L1 | L2-LM | L1(*) | L1 (?)
3243 # len(rawtext) | L2 | L2 | L2 | L2
3244 # len(rawtext) | L2 | L2 | L2 | L2
3244 # len(text) | L2 | L2 | L2 | L3
3245 # len(text) | L2 | L2 | L2 | L3
3245 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3246 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3246 #
3247 #
3247 # LM: length of metadata, depending on rawtext
3248 # LM: length of metadata, depending on rawtext
3248 # (*): not ideal, see comment in filelog.size
3249 # (*): not ideal, see comment in filelog.size
3249 # (?): could be "- len(meta)" if the resolved content has
3250 # (?): could be "- len(meta)" if the resolved content has
3250 # rename metadata
3251 # rename metadata
3251 #
3252 #
3252 # Checks needed to be done:
3253 # Checks needed to be done:
3253 # 1. length check: L1 == L2, in all cases.
3254 # 1. length check: L1 == L2, in all cases.
3254 # 2. hash check: depending on flag processor, we may need to
3255 # 2. hash check: depending on flag processor, we may need to
3255 # use either "text" (external), or "rawtext" (in revlog).
3256 # use either "text" (external), or "rawtext" (in revlog).
3256
3257
3257 try:
3258 try:
3258 skipflags = state.get(b'skipflags', 0)
3259 skipflags = state.get(b'skipflags', 0)
3259 if skipflags:
3260 if skipflags:
3260 skipflags &= self.flags(rev)
3261 skipflags &= self.flags(rev)
3261
3262
3262 _verify_revision(self, skipflags, state, node)
3263 _verify_revision(self, skipflags, state, node)
3263
3264
3264 l1 = self.rawsize(rev)
3265 l1 = self.rawsize(rev)
3265 l2 = len(self.rawdata(node))
3266 l2 = len(self.rawdata(node))
3266
3267
3267 if l1 != l2:
3268 if l1 != l2:
3268 yield revlogproblem(
3269 yield revlogproblem(
3269 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3270 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3270 node=node,
3271 node=node,
3271 )
3272 )
3272
3273
3273 except error.CensoredNodeError:
3274 except error.CensoredNodeError:
3274 if state[b'erroroncensored']:
3275 if state[b'erroroncensored']:
3275 yield revlogproblem(
3276 yield revlogproblem(
3276 error=_(b'censored file data'), node=node
3277 error=_(b'censored file data'), node=node
3277 )
3278 )
3278 state[b'skipread'].add(node)
3279 state[b'skipread'].add(node)
3279 except Exception as e:
3280 except Exception as e:
3280 yield revlogproblem(
3281 yield revlogproblem(
3281 error=_(b'unpacking %s: %s')
3282 error=_(b'unpacking %s: %s')
3282 % (short(node), stringutil.forcebytestr(e)),
3283 % (short(node), stringutil.forcebytestr(e)),
3283 node=node,
3284 node=node,
3284 )
3285 )
3285 state[b'skipread'].add(node)
3286 state[b'skipread'].add(node)
3286
3287
3287 def storageinfo(
3288 def storageinfo(
3288 self,
3289 self,
3289 exclusivefiles=False,
3290 exclusivefiles=False,
3290 sharedfiles=False,
3291 sharedfiles=False,
3291 revisionscount=False,
3292 revisionscount=False,
3292 trackedsize=False,
3293 trackedsize=False,
3293 storedsize=False,
3294 storedsize=False,
3294 ):
3295 ):
3295 d = {}
3296 d = {}
3296
3297
3297 if exclusivefiles:
3298 if exclusivefiles:
3298 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3299 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3299 if not self._inline:
3300 if not self._inline:
3300 d[b'exclusivefiles'].append((self.opener, self._datafile))
3301 d[b'exclusivefiles'].append((self.opener, self._datafile))
3301
3302
3302 if sharedfiles:
3303 if sharedfiles:
3303 d[b'sharedfiles'] = []
3304 d[b'sharedfiles'] = []
3304
3305
3305 if revisionscount:
3306 if revisionscount:
3306 d[b'revisionscount'] = len(self)
3307 d[b'revisionscount'] = len(self)
3307
3308
3308 if trackedsize:
3309 if trackedsize:
3309 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3310 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3310
3311
3311 if storedsize:
3312 if storedsize:
3312 d[b'storedsize'] = sum(
3313 d[b'storedsize'] = sum(
3313 self.opener.stat(path).st_size for path in self.files()
3314 self.opener.stat(path).st_size for path in self.files()
3314 )
3315 )
3315
3316
3316 return d
3317 return d
3317
3318
3318 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3319 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3319 if not self.hassidedata:
3320 if not self.hassidedata:
3320 return
3321 return
3321 # revlog formats with sidedata support does not support inline
3322 # revlog formats with sidedata support does not support inline
3322 assert not self._inline
3323 assert not self._inline
3323 if not helpers[1] and not helpers[2]:
3324 if not helpers[1] and not helpers[2]:
3324 # Nothing to generate or remove
3325 # Nothing to generate or remove
3325 return
3326 return
3326
3327
3327 new_entries = []
3328 new_entries = []
3328 # append the new sidedata
3329 # append the new sidedata
3329 with self._writing(transaction):
3330 with self._writing(transaction):
3330 ifh, dfh, sdfh = self._writinghandles
3331 ifh, dfh, sdfh = self._writinghandles
3331 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3332 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3332
3333
3333 current_offset = sdfh.tell()
3334 current_offset = sdfh.tell()
3334 for rev in range(startrev, endrev + 1):
3335 for rev in range(startrev, endrev + 1):
3335 entry = self.index[rev]
3336 entry = self.index[rev]
3336 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3337 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3337 store=self,
3338 store=self,
3338 sidedata_helpers=helpers,
3339 sidedata_helpers=helpers,
3339 sidedata={},
3340 sidedata={},
3340 rev=rev,
3341 rev=rev,
3341 )
3342 )
3342
3343
3343 serialized_sidedata = sidedatautil.serialize_sidedata(
3344 serialized_sidedata = sidedatautil.serialize_sidedata(
3344 new_sidedata
3345 new_sidedata
3345 )
3346 )
3346
3347
3347 sidedata_compression_mode = COMP_MODE_INLINE
3348 sidedata_compression_mode = COMP_MODE_INLINE
3348 if serialized_sidedata and self.hassidedata:
3349 if serialized_sidedata and self.hassidedata:
3349 sidedata_compression_mode = COMP_MODE_PLAIN
3350 sidedata_compression_mode = COMP_MODE_PLAIN
3350 h, comp_sidedata = self.compress(serialized_sidedata)
3351 h, comp_sidedata = self.compress(serialized_sidedata)
3351 if (
3352 if (
3352 h != b'u'
3353 h != b'u'
3353 and comp_sidedata[0] != b'\0'
3354 and comp_sidedata[0] != b'\0'
3354 and len(comp_sidedata) < len(serialized_sidedata)
3355 and len(comp_sidedata) < len(serialized_sidedata)
3355 ):
3356 ):
3356 assert not h
3357 assert not h
3357 if (
3358 if (
3358 comp_sidedata[0]
3359 comp_sidedata[0]
3359 == self._docket.default_compression_header
3360 == self._docket.default_compression_header
3360 ):
3361 ):
3361 sidedata_compression_mode = COMP_MODE_DEFAULT
3362 sidedata_compression_mode = COMP_MODE_DEFAULT
3362 serialized_sidedata = comp_sidedata
3363 serialized_sidedata = comp_sidedata
3363 else:
3364 else:
3364 sidedata_compression_mode = COMP_MODE_INLINE
3365 sidedata_compression_mode = COMP_MODE_INLINE
3365 serialized_sidedata = comp_sidedata
3366 serialized_sidedata = comp_sidedata
3366 if entry[8] != 0 or entry[9] != 0:
3367 if entry[8] != 0 or entry[9] != 0:
3367 # rewriting entries that already have sidedata is not
3368 # rewriting entries that already have sidedata is not
3368 # supported yet, because it introduces garbage data in the
3369 # supported yet, because it introduces garbage data in the
3369 # revlog.
3370 # revlog.
3370 msg = b"rewriting existing sidedata is not supported yet"
3371 msg = b"rewriting existing sidedata is not supported yet"
3371 raise error.Abort(msg)
3372 raise error.Abort(msg)
3372
3373
3373 # Apply (potential) flags to add and to remove after running
3374 # Apply (potential) flags to add and to remove after running
3374 # the sidedata helpers
3375 # the sidedata helpers
3375 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3376 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3376 entry_update = (
3377 entry_update = (
3377 current_offset,
3378 current_offset,
3378 len(serialized_sidedata),
3379 len(serialized_sidedata),
3379 new_offset_flags,
3380 new_offset_flags,
3380 sidedata_compression_mode,
3381 sidedata_compression_mode,
3381 )
3382 )
3382
3383
3383 # the sidedata computation might have move the file cursors around
3384 # the sidedata computation might have move the file cursors around
3384 sdfh.seek(current_offset, os.SEEK_SET)
3385 sdfh.seek(current_offset, os.SEEK_SET)
3385 sdfh.write(serialized_sidedata)
3386 sdfh.write(serialized_sidedata)
3386 new_entries.append(entry_update)
3387 new_entries.append(entry_update)
3387 current_offset += len(serialized_sidedata)
3388 current_offset += len(serialized_sidedata)
3388 self._docket.sidedata_end = sdfh.tell()
3389 self._docket.sidedata_end = sdfh.tell()
3389
3390
3390 # rewrite the new index entries
3391 # rewrite the new index entries
3391 ifh.seek(startrev * self.index.entry_size)
3392 ifh.seek(startrev * self.index.entry_size)
3392 for i, e in enumerate(new_entries):
3393 for i, e in enumerate(new_entries):
3393 rev = startrev + i
3394 rev = startrev + i
3394 self.index.replace_sidedata_info(rev, *e)
3395 self.index.replace_sidedata_info(rev, *e)
3395 packed = self.index.entry_binary(rev)
3396 packed = self.index.entry_binary(rev)
3396 if rev == 0 and self._docket is None:
3397 if rev == 0 and self._docket is None:
3397 header = self._format_flags | self._format_version
3398 header = self._format_flags | self._format_version
3398 header = self.index.pack_header(header)
3399 header = self.index.pack_header(header)
3399 packed = header + packed
3400 packed = header + packed
3400 ifh.write(packed)
3401 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now