##// END OF EJS Templates
revlog: document the `is_inline_index` method...
marmoute -
r51895:4ea60656 default
parent child Browse files
Show More
@@ -1,3529 +1,3530 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295
296
296 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
297 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
298
299
299 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
300 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
301
302
302 def __init__(
303 def __init__(
303 self,
304 self,
304 opener,
305 opener,
305 target,
306 target,
306 radix,
307 radix,
307 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
308 checkambig=False,
309 checkambig=False,
309 mmaplargeindex=False,
310 mmaplargeindex=False,
310 censorable=False,
311 censorable=False,
311 upperboundcomp=None,
312 upperboundcomp=None,
312 persistentnodemap=False,
313 persistentnodemap=False,
313 concurrencychecker=None,
314 concurrencychecker=None,
314 trypending=False,
315 trypending=False,
315 try_split=False,
316 try_split=False,
316 canonical_parent_order=True,
317 canonical_parent_order=True,
317 ):
318 ):
318 """
319 """
319 create a revlog object
320 create a revlog object
320
321
321 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
322 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
323
324
324 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
326 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
327 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
328 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
329 accurate value.
330 accurate value.
330 """
331 """
331 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
332
333
333 self.radix = radix
334 self.radix = radix
334
335
335 self._docket_file = None
336 self._docket_file = None
336 self._indexfile = None
337 self._indexfile = None
337 self._datafile = None
338 self._datafile = None
338 self._sidedatafile = None
339 self._sidedatafile = None
339 self._nodemap_file = None
340 self._nodemap_file = None
340 self.postfix = postfix
341 self.postfix = postfix
341 self._trypending = trypending
342 self._trypending = trypending
342 self._try_split = try_split
343 self._try_split = try_split
343 self.opener = opener
344 self.opener = opener
344 if persistentnodemap:
345 if persistentnodemap:
345 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346
347
347 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
348 assert len(target) == 2
349 assert len(target) == 2
349 self.target = target
350 self.target = target
350 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
352 self._checkambig = checkambig
353 self._checkambig = checkambig
353 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
354 self._censorable = censorable
355 self._censorable = censorable
355 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
356 self._revisioncache = None
357 self._revisioncache = None
357 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
358 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
359 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
361 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
362 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
363 self._maxchainlen = None
364 self._maxchainlen = None
364 self._deltabothparents = True
365 self._deltabothparents = True
365 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
366 self._debug_delta = False
367 self._debug_delta = False
367 self.index = None
368 self.index = None
368 self._docket = None
369 self._docket = None
369 self._nodemap_docket = None
370 self._nodemap_docket = None
370 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
371 self._pcache = {}
372 self._pcache = {}
372 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
373 self._compengine = b'zlib'
374 self._compengine = b'zlib'
374 self._compengineopts = {}
375 self._compengineopts = {}
375 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
376 self._withsparseread = False
377 self._withsparseread = False
377 self._sparserevlog = False
378 self._sparserevlog = False
378 self.hassidedata = False
379 self.hassidedata = False
379 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
380 self._srmingapsize = 262144
381 self._srmingapsize = 262144
381
382
382 # other optionnals features
383 # other optionnals features
383
384
384 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
385 self._compute_rank = False
386 self._compute_rank = False
386
387
387 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
388 # custom flags.
389 # custom flags.
389 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
390
391
391 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
392 self._writinghandles = None
393 self._writinghandles = None
393 # prevent nesting of addgroup
394 # prevent nesting of addgroup
394 self._adding_group = None
395 self._adding_group = None
395
396
396 self._loadindex()
397 self._loadindex()
397
398
398 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
399
400
400 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
401 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
402 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
403 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
404 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
405 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
406
407
407 def _init_opts(self):
408 def _init_opts(self):
408 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
409
410
410 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
411
412
412 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
413
414
414 * newversionflags:
415 * newversionflags:
415 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
416
417
417 * mmapindexthreshold:
418 * mmapindexthreshold:
418 minimal index size for start to use mmap
419 minimal index size for start to use mmap
419
420
420 * force_nodemap:
421 * force_nodemap:
421 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
422 """
423 """
423 mmapindexthreshold = None
424 mmapindexthreshold = None
424 opts = self.opener.options
425 opts = self.opener.options
425
426
426 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
428 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
430 new_header = REVLOGV2
431 new_header = REVLOGV2
431 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
432 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
434 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
435 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
436 new_header = REVLOGV0
437 new_header = REVLOGV0
437 else:
438 else:
438 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
439
440
440 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
441 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
442 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
443 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
444 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
445 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
446 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 if dps_cgds:
448 if dps_cgds:
448 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
449 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydeltabase = False
451 self._lazydeltabase = False
451 if self._lazydelta:
452 if self._lazydelta:
452 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
454 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
455 if b'compengine' in opts:
456 if b'compengine' in opts:
456 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
457 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
458 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
460 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
462 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
468 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
469 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
470 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
472 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
474 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475
476
476 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
477 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479
480
480 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
481 raise error.RevlogError(
482 raise error.RevlogError(
482 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
483 % self._chunkcachesize
484 % self._chunkcachesize
484 )
485 )
485 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 raise error.RevlogError(
487 raise error.RevlogError(
487 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
488 % self._chunkcachesize
489 % self._chunkcachesize
489 )
490 )
490 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
492
493
493 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 """return a file content with or without mmap
495 """return a file content with or without mmap
495
496
496 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
497 try:
498 try:
498 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
499 if mmap_threshold is not None:
500 if mmap_threshold is not None:
500 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
501 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
502 if size is not None:
503 if size is not None:
503 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
504 size = min(file_size, size)
505 size = min(file_size, size)
505 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
506 # relying on Python GC
507 # relying on Python GC
507 if size is None:
508 if size is None:
508 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
509 else:
510 else:
510 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
511 if size is None:
512 if size is None:
512 return fp.read()
513 return fp.read()
513 else:
514 else:
514 return fp.read(size)
515 return fp.read(size)
515 except FileNotFoundError:
516 except FileNotFoundError:
516 return b''
517 return b''
517
518
518 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
519 n = len(self)
520 n = len(self)
520 index = self.index
521 index = self.index
521 while n > 0:
522 while n > 0:
522 linkrev = index[n - 1][4]
523 linkrev = index[n - 1][4]
523 if linkrev < max_linkrev:
524 if linkrev < max_linkrev:
524 break
525 break
525 # note: this loop will rarely go through multiple iterations, since
526 # note: this loop will rarely go through multiple iterations, since
526 # it only traverses commits created during the current streaming
527 # it only traverses commits created during the current streaming
527 # pull operation.
528 # pull operation.
528 #
529 #
529 # If this become a problem, using a binary search should cap the
530 # If this become a problem, using a binary search should cap the
530 # runtime of this.
531 # runtime of this.
531 n = n - 1
532 n = n - 1
532 if n == 0:
533 if n == 0:
533 # no data to send
534 # no data to send
534 return []
535 return []
535 index_size = n * index.entry_size
536 index_size = n * index.entry_size
536 data_size = self.end(n - 1)
537 data_size = self.end(n - 1)
537
538
538 # XXX we might have been split (or stripped) since the object
539 # XXX we might have been split (or stripped) since the object
539 # initialization, We need to close this race too, but having a way to
540 # initialization, We need to close this race too, but having a way to
540 # pre-open the file we feed to the revlog and never closing them before
541 # pre-open the file we feed to the revlog and never closing them before
541 # we are done streaming.
542 # we are done streaming.
542
543
543 if self._inline:
544 if self._inline:
544
545
545 def get_stream():
546 def get_stream():
546 with self._indexfp() as fp:
547 with self._indexfp() as fp:
547 yield None
548 yield None
548 size = index_size + data_size
549 size = index_size + data_size
549 if size <= 65536:
550 if size <= 65536:
550 yield fp.read(size)
551 yield fp.read(size)
551 else:
552 else:
552 yield from util.filechunkiter(fp, limit=size)
553 yield from util.filechunkiter(fp, limit=size)
553
554
554 inline_stream = get_stream()
555 inline_stream = get_stream()
555 next(inline_stream)
556 next(inline_stream)
556 return [
557 return [
557 (self._indexfile, inline_stream, index_size + data_size),
558 (self._indexfile, inline_stream, index_size + data_size),
558 ]
559 ]
559 elif force_inline:
560 elif force_inline:
560
561
561 def get_stream():
562 def get_stream():
562 with self._datafp() as fp_d:
563 with self._datafp() as fp_d:
563 yield None
564 yield None
564
565
565 for rev in range(n):
566 for rev in range(n):
566 idx = self.index.entry_binary(rev)
567 idx = self.index.entry_binary(rev)
567 if rev == 0 and self._docket is None:
568 if rev == 0 and self._docket is None:
568 # re-inject the inline flag
569 # re-inject the inline flag
569 header = self._format_flags
570 header = self._format_flags
570 header |= self._format_version
571 header |= self._format_version
571 header |= FLAG_INLINE_DATA
572 header |= FLAG_INLINE_DATA
572 header = self.index.pack_header(header)
573 header = self.index.pack_header(header)
573 idx = header + idx
574 idx = header + idx
574 yield idx
575 yield idx
575 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
576 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
576
577
577 inline_stream = get_stream()
578 inline_stream = get_stream()
578 next(inline_stream)
579 next(inline_stream)
579 return [
580 return [
580 (self._indexfile, inline_stream, index_size + data_size),
581 (self._indexfile, inline_stream, index_size + data_size),
581 ]
582 ]
582 else:
583 else:
583
584
584 def get_index_stream():
585 def get_index_stream():
585 with self._indexfp() as fp:
586 with self._indexfp() as fp:
586 yield None
587 yield None
587 if index_size <= 65536:
588 if index_size <= 65536:
588 yield fp.read(index_size)
589 yield fp.read(index_size)
589 else:
590 else:
590 yield from util.filechunkiter(fp, limit=index_size)
591 yield from util.filechunkiter(fp, limit=index_size)
591
592
592 def get_data_stream():
593 def get_data_stream():
593 with self._datafp() as fp:
594 with self._datafp() as fp:
594 yield None
595 yield None
595 if data_size <= 65536:
596 if data_size <= 65536:
596 yield fp.read(data_size)
597 yield fp.read(data_size)
597 else:
598 else:
598 yield from util.filechunkiter(fp, limit=data_size)
599 yield from util.filechunkiter(fp, limit=data_size)
599
600
600 index_stream = get_index_stream()
601 index_stream = get_index_stream()
601 next(index_stream)
602 next(index_stream)
602 data_stream = get_data_stream()
603 data_stream = get_data_stream()
603 next(data_stream)
604 next(data_stream)
604 return [
605 return [
605 (self._datafile, data_stream, data_size),
606 (self._datafile, data_stream, data_size),
606 (self._indexfile, index_stream, index_size),
607 (self._indexfile, index_stream, index_size),
607 ]
608 ]
608
609
609 def _loadindex(self, docket=None):
610 def _loadindex(self, docket=None):
610
611
611 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
612 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
612
613
613 if self.postfix is not None:
614 if self.postfix is not None:
614 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
615 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
615 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
616 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
616 entry_point = b'%s.i.a' % self.radix
617 entry_point = b'%s.i.a' % self.radix
617 elif self._try_split and self.opener.exists(self._split_index_file):
618 elif self._try_split and self.opener.exists(self._split_index_file):
618 entry_point = self._split_index_file
619 entry_point = self._split_index_file
619 else:
620 else:
620 entry_point = b'%s.i' % self.radix
621 entry_point = b'%s.i' % self.radix
621
622
622 if docket is not None:
623 if docket is not None:
623 self._docket = docket
624 self._docket = docket
624 self._docket_file = entry_point
625 self._docket_file = entry_point
625 else:
626 else:
626 self._initempty = True
627 self._initempty = True
627 entry_data = self._get_data(entry_point, mmapindexthreshold)
628 entry_data = self._get_data(entry_point, mmapindexthreshold)
628 if len(entry_data) > 0:
629 if len(entry_data) > 0:
629 header = INDEX_HEADER.unpack(entry_data[:4])[0]
630 header = INDEX_HEADER.unpack(entry_data[:4])[0]
630 self._initempty = False
631 self._initempty = False
631 else:
632 else:
632 header = new_header
633 header = new_header
633
634
634 self._format_flags = header & ~0xFFFF
635 self._format_flags = header & ~0xFFFF
635 self._format_version = header & 0xFFFF
636 self._format_version = header & 0xFFFF
636
637
637 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
638 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
638 if supported_flags is None:
639 if supported_flags is None:
639 msg = _(b'unknown version (%d) in revlog %s')
640 msg = _(b'unknown version (%d) in revlog %s')
640 msg %= (self._format_version, self.display_id)
641 msg %= (self._format_version, self.display_id)
641 raise error.RevlogError(msg)
642 raise error.RevlogError(msg)
642 elif self._format_flags & ~supported_flags:
643 elif self._format_flags & ~supported_flags:
643 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
644 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
644 display_flag = self._format_flags >> 16
645 display_flag = self._format_flags >> 16
645 msg %= (display_flag, self._format_version, self.display_id)
646 msg %= (display_flag, self._format_version, self.display_id)
646 raise error.RevlogError(msg)
647 raise error.RevlogError(msg)
647
648
648 features = FEATURES_BY_VERSION[self._format_version]
649 features = FEATURES_BY_VERSION[self._format_version]
649 self._inline = features[b'inline'](self._format_flags)
650 self._inline = features[b'inline'](self._format_flags)
650 self._generaldelta = features[b'generaldelta'](self._format_flags)
651 self._generaldelta = features[b'generaldelta'](self._format_flags)
651 self.hassidedata = features[b'sidedata']
652 self.hassidedata = features[b'sidedata']
652
653
653 if not features[b'docket']:
654 if not features[b'docket']:
654 self._indexfile = entry_point
655 self._indexfile = entry_point
655 index_data = entry_data
656 index_data = entry_data
656 else:
657 else:
657 self._docket_file = entry_point
658 self._docket_file = entry_point
658 if self._initempty:
659 if self._initempty:
659 self._docket = docketutil.default_docket(self, header)
660 self._docket = docketutil.default_docket(self, header)
660 else:
661 else:
661 self._docket = docketutil.parse_docket(
662 self._docket = docketutil.parse_docket(
662 self, entry_data, use_pending=self._trypending
663 self, entry_data, use_pending=self._trypending
663 )
664 )
664
665
665 if self._docket is not None:
666 if self._docket is not None:
666 self._indexfile = self._docket.index_filepath()
667 self._indexfile = self._docket.index_filepath()
667 index_data = b''
668 index_data = b''
668 index_size = self._docket.index_end
669 index_size = self._docket.index_end
669 if index_size > 0:
670 if index_size > 0:
670 index_data = self._get_data(
671 index_data = self._get_data(
671 self._indexfile, mmapindexthreshold, size=index_size
672 self._indexfile, mmapindexthreshold, size=index_size
672 )
673 )
673 if len(index_data) < index_size:
674 if len(index_data) < index_size:
674 msg = _(b'too few index data for %s: got %d, expected %d')
675 msg = _(b'too few index data for %s: got %d, expected %d')
675 msg %= (self.display_id, len(index_data), index_size)
676 msg %= (self.display_id, len(index_data), index_size)
676 raise error.RevlogError(msg)
677 raise error.RevlogError(msg)
677
678
678 self._inline = False
679 self._inline = False
679 # generaldelta implied by version 2 revlogs.
680 # generaldelta implied by version 2 revlogs.
680 self._generaldelta = True
681 self._generaldelta = True
681 # the logic for persistent nodemap will be dealt with within the
682 # the logic for persistent nodemap will be dealt with within the
682 # main docket, so disable it for now.
683 # main docket, so disable it for now.
683 self._nodemap_file = None
684 self._nodemap_file = None
684
685
685 if self._docket is not None:
686 if self._docket is not None:
686 self._datafile = self._docket.data_filepath()
687 self._datafile = self._docket.data_filepath()
687 self._sidedatafile = self._docket.sidedata_filepath()
688 self._sidedatafile = self._docket.sidedata_filepath()
688 elif self.postfix is None:
689 elif self.postfix is None:
689 self._datafile = b'%s.d' % self.radix
690 self._datafile = b'%s.d' % self.radix
690 else:
691 else:
691 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
692 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
692
693
693 self.nodeconstants = sha1nodeconstants
694 self.nodeconstants = sha1nodeconstants
694 self.nullid = self.nodeconstants.nullid
695 self.nullid = self.nodeconstants.nullid
695
696
696 # sparse-revlog can't be on without general-delta (issue6056)
697 # sparse-revlog can't be on without general-delta (issue6056)
697 if not self._generaldelta:
698 if not self._generaldelta:
698 self._sparserevlog = False
699 self._sparserevlog = False
699
700
700 self._storedeltachains = True
701 self._storedeltachains = True
701
702
702 devel_nodemap = (
703 devel_nodemap = (
703 self._nodemap_file
704 self._nodemap_file
704 and force_nodemap
705 and force_nodemap
705 and parse_index_v1_nodemap is not None
706 and parse_index_v1_nodemap is not None
706 )
707 )
707
708
708 use_rust_index = False
709 use_rust_index = False
709 if rustrevlog is not None:
710 if rustrevlog is not None:
710 if self._nodemap_file is not None:
711 if self._nodemap_file is not None:
711 use_rust_index = True
712 use_rust_index = True
712 else:
713 else:
713 use_rust_index = self.opener.options.get(b'rust.index')
714 use_rust_index = self.opener.options.get(b'rust.index')
714
715
715 self._parse_index = parse_index_v1
716 self._parse_index = parse_index_v1
716 if self._format_version == REVLOGV0:
717 if self._format_version == REVLOGV0:
717 self._parse_index = revlogv0.parse_index_v0
718 self._parse_index = revlogv0.parse_index_v0
718 elif self._format_version == REVLOGV2:
719 elif self._format_version == REVLOGV2:
719 self._parse_index = parse_index_v2
720 self._parse_index = parse_index_v2
720 elif self._format_version == CHANGELOGV2:
721 elif self._format_version == CHANGELOGV2:
721 self._parse_index = parse_index_cl_v2
722 self._parse_index = parse_index_cl_v2
722 elif devel_nodemap:
723 elif devel_nodemap:
723 self._parse_index = parse_index_v1_nodemap
724 self._parse_index = parse_index_v1_nodemap
724 elif use_rust_index:
725 elif use_rust_index:
725 self._parse_index = parse_index_v1_mixed
726 self._parse_index = parse_index_v1_mixed
726 try:
727 try:
727 d = self._parse_index(index_data, self._inline)
728 d = self._parse_index(index_data, self._inline)
728 index, chunkcache = d
729 index, chunkcache = d
729 use_nodemap = (
730 use_nodemap = (
730 not self._inline
731 not self._inline
731 and self._nodemap_file is not None
732 and self._nodemap_file is not None
732 and hasattr(index, 'update_nodemap_data')
733 and hasattr(index, 'update_nodemap_data')
733 )
734 )
734 if use_nodemap:
735 if use_nodemap:
735 nodemap_data = nodemaputil.persisted_data(self)
736 nodemap_data = nodemaputil.persisted_data(self)
736 if nodemap_data is not None:
737 if nodemap_data is not None:
737 docket = nodemap_data[0]
738 docket = nodemap_data[0]
738 if (
739 if (
739 len(d[0]) > docket.tip_rev
740 len(d[0]) > docket.tip_rev
740 and d[0][docket.tip_rev][7] == docket.tip_node
741 and d[0][docket.tip_rev][7] == docket.tip_node
741 ):
742 ):
742 # no changelog tampering
743 # no changelog tampering
743 self._nodemap_docket = docket
744 self._nodemap_docket = docket
744 index.update_nodemap_data(*nodemap_data)
745 index.update_nodemap_data(*nodemap_data)
745 except (ValueError, IndexError):
746 except (ValueError, IndexError):
746 raise error.RevlogError(
747 raise error.RevlogError(
747 _(b"index %s is corrupted") % self.display_id
748 _(b"index %s is corrupted") % self.display_id
748 )
749 )
749 self.index = index
750 self.index = index
750 self._segmentfile = randomaccessfile.randomaccessfile(
751 self._segmentfile = randomaccessfile.randomaccessfile(
751 self.opener,
752 self.opener,
752 (self._indexfile if self._inline else self._datafile),
753 (self._indexfile if self._inline else self._datafile),
753 self._chunkcachesize,
754 self._chunkcachesize,
754 chunkcache,
755 chunkcache,
755 )
756 )
756 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
757 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
757 self.opener,
758 self.opener,
758 self._sidedatafile,
759 self._sidedatafile,
759 self._chunkcachesize,
760 self._chunkcachesize,
760 )
761 )
761 # revnum -> (chain-length, sum-delta-length)
762 # revnum -> (chain-length, sum-delta-length)
762 self._chaininfocache = util.lrucachedict(500)
763 self._chaininfocache = util.lrucachedict(500)
763 # revlog header -> revlog compressor
764 # revlog header -> revlog compressor
764 self._decompressors = {}
765 self._decompressors = {}
765
766
766 def get_revlog(self):
767 def get_revlog(self):
767 """simple function to mirror API of other not-really-revlog API"""
768 """simple function to mirror API of other not-really-revlog API"""
768 return self
769 return self
769
770
770 @util.propertycache
771 @util.propertycache
771 def revlog_kind(self):
772 def revlog_kind(self):
772 return self.target[0]
773 return self.target[0]
773
774
774 @util.propertycache
775 @util.propertycache
775 def display_id(self):
776 def display_id(self):
776 """The public facing "ID" of the revlog that we use in message"""
777 """The public facing "ID" of the revlog that we use in message"""
777 if self.revlog_kind == KIND_FILELOG:
778 if self.revlog_kind == KIND_FILELOG:
778 # Reference the file without the "data/" prefix, so it is familiar
779 # Reference the file without the "data/" prefix, so it is familiar
779 # to the user.
780 # to the user.
780 return self.target[1]
781 return self.target[1]
781 else:
782 else:
782 return self.radix
783 return self.radix
783
784
784 def _get_decompressor(self, t):
785 def _get_decompressor(self, t):
785 try:
786 try:
786 compressor = self._decompressors[t]
787 compressor = self._decompressors[t]
787 except KeyError:
788 except KeyError:
788 try:
789 try:
789 engine = util.compengines.forrevlogheader(t)
790 engine = util.compengines.forrevlogheader(t)
790 compressor = engine.revlogcompressor(self._compengineopts)
791 compressor = engine.revlogcompressor(self._compengineopts)
791 self._decompressors[t] = compressor
792 self._decompressors[t] = compressor
792 except KeyError:
793 except KeyError:
793 raise error.RevlogError(
794 raise error.RevlogError(
794 _(b'unknown compression type %s') % binascii.hexlify(t)
795 _(b'unknown compression type %s') % binascii.hexlify(t)
795 )
796 )
796 return compressor
797 return compressor
797
798
798 @util.propertycache
799 @util.propertycache
799 def _compressor(self):
800 def _compressor(self):
800 engine = util.compengines[self._compengine]
801 engine = util.compengines[self._compengine]
801 return engine.revlogcompressor(self._compengineopts)
802 return engine.revlogcompressor(self._compengineopts)
802
803
803 @util.propertycache
804 @util.propertycache
804 def _decompressor(self):
805 def _decompressor(self):
805 """the default decompressor"""
806 """the default decompressor"""
806 if self._docket is None:
807 if self._docket is None:
807 return None
808 return None
808 t = self._docket.default_compression_header
809 t = self._docket.default_compression_header
809 c = self._get_decompressor(t)
810 c = self._get_decompressor(t)
810 return c.decompress
811 return c.decompress
811
812
812 def _indexfp(self):
813 def _indexfp(self):
813 """file object for the revlog's index file"""
814 """file object for the revlog's index file"""
814 return self.opener(self._indexfile, mode=b"r")
815 return self.opener(self._indexfile, mode=b"r")
815
816
816 def __index_write_fp(self):
817 def __index_write_fp(self):
817 # You should not use this directly and use `_writing` instead
818 # You should not use this directly and use `_writing` instead
818 try:
819 try:
819 f = self.opener(
820 f = self.opener(
820 self._indexfile, mode=b"r+", checkambig=self._checkambig
821 self._indexfile, mode=b"r+", checkambig=self._checkambig
821 )
822 )
822 if self._docket is None:
823 if self._docket is None:
823 f.seek(0, os.SEEK_END)
824 f.seek(0, os.SEEK_END)
824 else:
825 else:
825 f.seek(self._docket.index_end, os.SEEK_SET)
826 f.seek(self._docket.index_end, os.SEEK_SET)
826 return f
827 return f
827 except FileNotFoundError:
828 except FileNotFoundError:
828 return self.opener(
829 return self.opener(
829 self._indexfile, mode=b"w+", checkambig=self._checkambig
830 self._indexfile, mode=b"w+", checkambig=self._checkambig
830 )
831 )
831
832
832 def __index_new_fp(self):
833 def __index_new_fp(self):
833 # You should not use this unless you are upgrading from inline revlog
834 # You should not use this unless you are upgrading from inline revlog
834 return self.opener(
835 return self.opener(
835 self._indexfile,
836 self._indexfile,
836 mode=b"w",
837 mode=b"w",
837 checkambig=self._checkambig,
838 checkambig=self._checkambig,
838 atomictemp=True,
839 atomictemp=True,
839 )
840 )
840
841
841 def _datafp(self, mode=b'r'):
842 def _datafp(self, mode=b'r'):
842 """file object for the revlog's data file"""
843 """file object for the revlog's data file"""
843 return self.opener(self._datafile, mode=mode)
844 return self.opener(self._datafile, mode=mode)
844
845
845 @contextlib.contextmanager
846 @contextlib.contextmanager
846 def _sidedatareadfp(self):
847 def _sidedatareadfp(self):
847 """file object suitable to read sidedata"""
848 """file object suitable to read sidedata"""
848 if self._writinghandles:
849 if self._writinghandles:
849 yield self._writinghandles[2]
850 yield self._writinghandles[2]
850 else:
851 else:
851 with self.opener(self._sidedatafile) as fp:
852 with self.opener(self._sidedatafile) as fp:
852 yield fp
853 yield fp
853
854
854 def tiprev(self):
855 def tiprev(self):
855 return len(self.index) - 1
856 return len(self.index) - 1
856
857
857 def tip(self):
858 def tip(self):
858 return self.node(self.tiprev())
859 return self.node(self.tiprev())
859
860
860 def __contains__(self, rev):
861 def __contains__(self, rev):
861 return 0 <= rev < len(self)
862 return 0 <= rev < len(self)
862
863
863 def __len__(self):
864 def __len__(self):
864 return len(self.index)
865 return len(self.index)
865
866
866 def __iter__(self):
867 def __iter__(self):
867 return iter(range(len(self)))
868 return iter(range(len(self)))
868
869
869 def revs(self, start=0, stop=None):
870 def revs(self, start=0, stop=None):
870 """iterate over all rev in this revlog (from start to stop)"""
871 """iterate over all rev in this revlog (from start to stop)"""
871 return storageutil.iterrevs(len(self), start=start, stop=stop)
872 return storageutil.iterrevs(len(self), start=start, stop=stop)
872
873
873 def hasnode(self, node):
874 def hasnode(self, node):
874 try:
875 try:
875 self.rev(node)
876 self.rev(node)
876 return True
877 return True
877 except KeyError:
878 except KeyError:
878 return False
879 return False
879
880
880 def candelta(self, baserev, rev):
881 def candelta(self, baserev, rev):
881 """whether two revisions (baserev, rev) can be delta-ed or not"""
882 """whether two revisions (baserev, rev) can be delta-ed or not"""
882 # Disable delta if either rev requires a content-changing flag
883 # Disable delta if either rev requires a content-changing flag
883 # processor (ex. LFS). This is because such flag processor can alter
884 # processor (ex. LFS). This is because such flag processor can alter
884 # the rawtext content that the delta will be based on, and two clients
885 # the rawtext content that the delta will be based on, and two clients
885 # could have a same revlog node with different flags (i.e. different
886 # could have a same revlog node with different flags (i.e. different
886 # rawtext contents) and the delta could be incompatible.
887 # rawtext contents) and the delta could be incompatible.
887 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
888 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
888 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
889 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
889 ):
890 ):
890 return False
891 return False
891 return True
892 return True
892
893
893 def update_caches(self, transaction):
894 def update_caches(self, transaction):
894 if self._nodemap_file is not None:
895 if self._nodemap_file is not None:
895 if transaction is None:
896 if transaction is None:
896 nodemaputil.update_persistent_nodemap(self)
897 nodemaputil.update_persistent_nodemap(self)
897 else:
898 else:
898 nodemaputil.setup_persistent_nodemap(transaction, self)
899 nodemaputil.setup_persistent_nodemap(transaction, self)
899
900
900 def clearcaches(self):
901 def clearcaches(self):
901 self._revisioncache = None
902 self._revisioncache = None
902 self._chainbasecache.clear()
903 self._chainbasecache.clear()
903 self._segmentfile.clear_cache()
904 self._segmentfile.clear_cache()
904 self._segmentfile_sidedata.clear_cache()
905 self._segmentfile_sidedata.clear_cache()
905 self._pcache = {}
906 self._pcache = {}
906 self._nodemap_docket = None
907 self._nodemap_docket = None
907 self.index.clearcaches()
908 self.index.clearcaches()
908 # The python code is the one responsible for validating the docket, we
909 # The python code is the one responsible for validating the docket, we
909 # end up having to refresh it here.
910 # end up having to refresh it here.
910 use_nodemap = (
911 use_nodemap = (
911 not self._inline
912 not self._inline
912 and self._nodemap_file is not None
913 and self._nodemap_file is not None
913 and hasattr(self.index, 'update_nodemap_data')
914 and hasattr(self.index, 'update_nodemap_data')
914 )
915 )
915 if use_nodemap:
916 if use_nodemap:
916 nodemap_data = nodemaputil.persisted_data(self)
917 nodemap_data = nodemaputil.persisted_data(self)
917 if nodemap_data is not None:
918 if nodemap_data is not None:
918 self._nodemap_docket = nodemap_data[0]
919 self._nodemap_docket = nodemap_data[0]
919 self.index.update_nodemap_data(*nodemap_data)
920 self.index.update_nodemap_data(*nodemap_data)
920
921
921 def rev(self, node):
922 def rev(self, node):
922 try:
923 try:
923 return self.index.rev(node)
924 return self.index.rev(node)
924 except TypeError:
925 except TypeError:
925 raise
926 raise
926 except error.RevlogError:
927 except error.RevlogError:
927 # parsers.c radix tree lookup failed
928 # parsers.c radix tree lookup failed
928 if (
929 if (
929 node == self.nodeconstants.wdirid
930 node == self.nodeconstants.wdirid
930 or node in self.nodeconstants.wdirfilenodeids
931 or node in self.nodeconstants.wdirfilenodeids
931 ):
932 ):
932 raise error.WdirUnsupported
933 raise error.WdirUnsupported
933 raise error.LookupError(node, self.display_id, _(b'no node'))
934 raise error.LookupError(node, self.display_id, _(b'no node'))
934
935
935 # Accessors for index entries.
936 # Accessors for index entries.
936
937
937 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
938 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
938 # are flags.
939 # are flags.
939 def start(self, rev):
940 def start(self, rev):
940 return int(self.index[rev][0] >> 16)
941 return int(self.index[rev][0] >> 16)
941
942
942 def sidedata_cut_off(self, rev):
943 def sidedata_cut_off(self, rev):
943 sd_cut_off = self.index[rev][8]
944 sd_cut_off = self.index[rev][8]
944 if sd_cut_off != 0:
945 if sd_cut_off != 0:
945 return sd_cut_off
946 return sd_cut_off
946 # This is some annoying dance, because entries without sidedata
947 # This is some annoying dance, because entries without sidedata
947 # currently use 0 as their ofsset. (instead of previous-offset +
948 # currently use 0 as their ofsset. (instead of previous-offset +
948 # previous-size)
949 # previous-size)
949 #
950 #
950 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
951 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
951 # In the meantime, we need this.
952 # In the meantime, we need this.
952 while 0 <= rev:
953 while 0 <= rev:
953 e = self.index[rev]
954 e = self.index[rev]
954 if e[9] != 0:
955 if e[9] != 0:
955 return e[8] + e[9]
956 return e[8] + e[9]
956 rev -= 1
957 rev -= 1
957 return 0
958 return 0
958
959
959 def flags(self, rev):
960 def flags(self, rev):
960 return self.index[rev][0] & 0xFFFF
961 return self.index[rev][0] & 0xFFFF
961
962
962 def length(self, rev):
963 def length(self, rev):
963 return self.index[rev][1]
964 return self.index[rev][1]
964
965
965 def sidedata_length(self, rev):
966 def sidedata_length(self, rev):
966 if not self.hassidedata:
967 if not self.hassidedata:
967 return 0
968 return 0
968 return self.index[rev][9]
969 return self.index[rev][9]
969
970
970 def rawsize(self, rev):
971 def rawsize(self, rev):
971 """return the length of the uncompressed text for a given revision"""
972 """return the length of the uncompressed text for a given revision"""
972 l = self.index[rev][2]
973 l = self.index[rev][2]
973 if l >= 0:
974 if l >= 0:
974 return l
975 return l
975
976
976 t = self.rawdata(rev)
977 t = self.rawdata(rev)
977 return len(t)
978 return len(t)
978
979
979 def size(self, rev):
980 def size(self, rev):
980 """length of non-raw text (processed by a "read" flag processor)"""
981 """length of non-raw text (processed by a "read" flag processor)"""
981 # fast path: if no "read" flag processor could change the content,
982 # fast path: if no "read" flag processor could change the content,
982 # size is rawsize. note: ELLIPSIS is known to not change the content.
983 # size is rawsize. note: ELLIPSIS is known to not change the content.
983 flags = self.flags(rev)
984 flags = self.flags(rev)
984 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
985 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
985 return self.rawsize(rev)
986 return self.rawsize(rev)
986
987
987 return len(self.revision(rev))
988 return len(self.revision(rev))
988
989
989 def fast_rank(self, rev):
990 def fast_rank(self, rev):
990 """Return the rank of a revision if already known, or None otherwise.
991 """Return the rank of a revision if already known, or None otherwise.
991
992
992 The rank of a revision is the size of the sub-graph it defines as a
993 The rank of a revision is the size of the sub-graph it defines as a
993 head. Equivalently, the rank of a revision `r` is the size of the set
994 head. Equivalently, the rank of a revision `r` is the size of the set
994 `ancestors(r)`, `r` included.
995 `ancestors(r)`, `r` included.
995
996
996 This method returns the rank retrieved from the revlog in constant
997 This method returns the rank retrieved from the revlog in constant
997 time. It makes no attempt at computing unknown values for versions of
998 time. It makes no attempt at computing unknown values for versions of
998 the revlog which do not persist the rank.
999 the revlog which do not persist the rank.
999 """
1000 """
1000 rank = self.index[rev][ENTRY_RANK]
1001 rank = self.index[rev][ENTRY_RANK]
1001 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1002 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1002 return None
1003 return None
1003 if rev == nullrev:
1004 if rev == nullrev:
1004 return 0 # convention
1005 return 0 # convention
1005 return rank
1006 return rank
1006
1007
1007 def chainbase(self, rev):
1008 def chainbase(self, rev):
1008 base = self._chainbasecache.get(rev)
1009 base = self._chainbasecache.get(rev)
1009 if base is not None:
1010 if base is not None:
1010 return base
1011 return base
1011
1012
1012 index = self.index
1013 index = self.index
1013 iterrev = rev
1014 iterrev = rev
1014 base = index[iterrev][3]
1015 base = index[iterrev][3]
1015 while base != iterrev:
1016 while base != iterrev:
1016 iterrev = base
1017 iterrev = base
1017 base = index[iterrev][3]
1018 base = index[iterrev][3]
1018
1019
1019 self._chainbasecache[rev] = base
1020 self._chainbasecache[rev] = base
1020 return base
1021 return base
1021
1022
1022 def linkrev(self, rev):
1023 def linkrev(self, rev):
1023 return self.index[rev][4]
1024 return self.index[rev][4]
1024
1025
1025 def parentrevs(self, rev):
1026 def parentrevs(self, rev):
1026 try:
1027 try:
1027 entry = self.index[rev]
1028 entry = self.index[rev]
1028 except IndexError:
1029 except IndexError:
1029 if rev == wdirrev:
1030 if rev == wdirrev:
1030 raise error.WdirUnsupported
1031 raise error.WdirUnsupported
1031 raise
1032 raise
1032
1033
1033 if self.canonical_parent_order and entry[5] == nullrev:
1034 if self.canonical_parent_order and entry[5] == nullrev:
1034 return entry[6], entry[5]
1035 return entry[6], entry[5]
1035 else:
1036 else:
1036 return entry[5], entry[6]
1037 return entry[5], entry[6]
1037
1038
1038 # fast parentrevs(rev) where rev isn't filtered
1039 # fast parentrevs(rev) where rev isn't filtered
1039 _uncheckedparentrevs = parentrevs
1040 _uncheckedparentrevs = parentrevs
1040
1041
1041 def node(self, rev):
1042 def node(self, rev):
1042 try:
1043 try:
1043 return self.index[rev][7]
1044 return self.index[rev][7]
1044 except IndexError:
1045 except IndexError:
1045 if rev == wdirrev:
1046 if rev == wdirrev:
1046 raise error.WdirUnsupported
1047 raise error.WdirUnsupported
1047 raise
1048 raise
1048
1049
1049 # Derived from index values.
1050 # Derived from index values.
1050
1051
1051 def end(self, rev):
1052 def end(self, rev):
1052 return self.start(rev) + self.length(rev)
1053 return self.start(rev) + self.length(rev)
1053
1054
1054 def parents(self, node):
1055 def parents(self, node):
1055 i = self.index
1056 i = self.index
1056 d = i[self.rev(node)]
1057 d = i[self.rev(node)]
1057 # inline node() to avoid function call overhead
1058 # inline node() to avoid function call overhead
1058 if self.canonical_parent_order and d[5] == self.nullid:
1059 if self.canonical_parent_order and d[5] == self.nullid:
1059 return i[d[6]][7], i[d[5]][7]
1060 return i[d[6]][7], i[d[5]][7]
1060 else:
1061 else:
1061 return i[d[5]][7], i[d[6]][7]
1062 return i[d[5]][7], i[d[6]][7]
1062
1063
1063 def chainlen(self, rev):
1064 def chainlen(self, rev):
1064 return self._chaininfo(rev)[0]
1065 return self._chaininfo(rev)[0]
1065
1066
1066 def _chaininfo(self, rev):
1067 def _chaininfo(self, rev):
1067 chaininfocache = self._chaininfocache
1068 chaininfocache = self._chaininfocache
1068 if rev in chaininfocache:
1069 if rev in chaininfocache:
1069 return chaininfocache[rev]
1070 return chaininfocache[rev]
1070 index = self.index
1071 index = self.index
1071 generaldelta = self._generaldelta
1072 generaldelta = self._generaldelta
1072 iterrev = rev
1073 iterrev = rev
1073 e = index[iterrev]
1074 e = index[iterrev]
1074 clen = 0
1075 clen = 0
1075 compresseddeltalen = 0
1076 compresseddeltalen = 0
1076 while iterrev != e[3]:
1077 while iterrev != e[3]:
1077 clen += 1
1078 clen += 1
1078 compresseddeltalen += e[1]
1079 compresseddeltalen += e[1]
1079 if generaldelta:
1080 if generaldelta:
1080 iterrev = e[3]
1081 iterrev = e[3]
1081 else:
1082 else:
1082 iterrev -= 1
1083 iterrev -= 1
1083 if iterrev in chaininfocache:
1084 if iterrev in chaininfocache:
1084 t = chaininfocache[iterrev]
1085 t = chaininfocache[iterrev]
1085 clen += t[0]
1086 clen += t[0]
1086 compresseddeltalen += t[1]
1087 compresseddeltalen += t[1]
1087 break
1088 break
1088 e = index[iterrev]
1089 e = index[iterrev]
1089 else:
1090 else:
1090 # Add text length of base since decompressing that also takes
1091 # Add text length of base since decompressing that also takes
1091 # work. For cache hits the length is already included.
1092 # work. For cache hits the length is already included.
1092 compresseddeltalen += e[1]
1093 compresseddeltalen += e[1]
1093 r = (clen, compresseddeltalen)
1094 r = (clen, compresseddeltalen)
1094 chaininfocache[rev] = r
1095 chaininfocache[rev] = r
1095 return r
1096 return r
1096
1097
1097 def _deltachain(self, rev, stoprev=None):
1098 def _deltachain(self, rev, stoprev=None):
1098 """Obtain the delta chain for a revision.
1099 """Obtain the delta chain for a revision.
1099
1100
1100 ``stoprev`` specifies a revision to stop at. If not specified, we
1101 ``stoprev`` specifies a revision to stop at. If not specified, we
1101 stop at the base of the chain.
1102 stop at the base of the chain.
1102
1103
1103 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1104 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1104 revs in ascending order and ``stopped`` is a bool indicating whether
1105 revs in ascending order and ``stopped`` is a bool indicating whether
1105 ``stoprev`` was hit.
1106 ``stoprev`` was hit.
1106 """
1107 """
1107 # Try C implementation.
1108 # Try C implementation.
1108 try:
1109 try:
1109 return self.index.deltachain(rev, stoprev, self._generaldelta)
1110 return self.index.deltachain(rev, stoprev, self._generaldelta)
1110 except AttributeError:
1111 except AttributeError:
1111 pass
1112 pass
1112
1113
1113 chain = []
1114 chain = []
1114
1115
1115 # Alias to prevent attribute lookup in tight loop.
1116 # Alias to prevent attribute lookup in tight loop.
1116 index = self.index
1117 index = self.index
1117 generaldelta = self._generaldelta
1118 generaldelta = self._generaldelta
1118
1119
1119 iterrev = rev
1120 iterrev = rev
1120 e = index[iterrev]
1121 e = index[iterrev]
1121 while iterrev != e[3] and iterrev != stoprev:
1122 while iterrev != e[3] and iterrev != stoprev:
1122 chain.append(iterrev)
1123 chain.append(iterrev)
1123 if generaldelta:
1124 if generaldelta:
1124 iterrev = e[3]
1125 iterrev = e[3]
1125 else:
1126 else:
1126 iterrev -= 1
1127 iterrev -= 1
1127 e = index[iterrev]
1128 e = index[iterrev]
1128
1129
1129 if iterrev == stoprev:
1130 if iterrev == stoprev:
1130 stopped = True
1131 stopped = True
1131 else:
1132 else:
1132 chain.append(iterrev)
1133 chain.append(iterrev)
1133 stopped = False
1134 stopped = False
1134
1135
1135 chain.reverse()
1136 chain.reverse()
1136 return chain, stopped
1137 return chain, stopped
1137
1138
1138 def ancestors(self, revs, stoprev=0, inclusive=False):
1139 def ancestors(self, revs, stoprev=0, inclusive=False):
1139 """Generate the ancestors of 'revs' in reverse revision order.
1140 """Generate the ancestors of 'revs' in reverse revision order.
1140 Does not generate revs lower than stoprev.
1141 Does not generate revs lower than stoprev.
1141
1142
1142 See the documentation for ancestor.lazyancestors for more details."""
1143 See the documentation for ancestor.lazyancestors for more details."""
1143
1144
1144 # first, make sure start revisions aren't filtered
1145 # first, make sure start revisions aren't filtered
1145 revs = list(revs)
1146 revs = list(revs)
1146 checkrev = self.node
1147 checkrev = self.node
1147 for r in revs:
1148 for r in revs:
1148 checkrev(r)
1149 checkrev(r)
1149 # and we're sure ancestors aren't filtered as well
1150 # and we're sure ancestors aren't filtered as well
1150
1151
1151 if rustancestor is not None and self.index.rust_ext_compat:
1152 if rustancestor is not None and self.index.rust_ext_compat:
1152 lazyancestors = rustancestor.LazyAncestors
1153 lazyancestors = rustancestor.LazyAncestors
1153 arg = self.index
1154 arg = self.index
1154 else:
1155 else:
1155 lazyancestors = ancestor.lazyancestors
1156 lazyancestors = ancestor.lazyancestors
1156 arg = self._uncheckedparentrevs
1157 arg = self._uncheckedparentrevs
1157 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1158 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1158
1159
1159 def descendants(self, revs):
1160 def descendants(self, revs):
1160 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1161 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1161
1162
1162 def findcommonmissing(self, common=None, heads=None):
1163 def findcommonmissing(self, common=None, heads=None):
1163 """Return a tuple of the ancestors of common and the ancestors of heads
1164 """Return a tuple of the ancestors of common and the ancestors of heads
1164 that are not ancestors of common. In revset terminology, we return the
1165 that are not ancestors of common. In revset terminology, we return the
1165 tuple:
1166 tuple:
1166
1167
1167 ::common, (::heads) - (::common)
1168 ::common, (::heads) - (::common)
1168
1169
1169 The list is sorted by revision number, meaning it is
1170 The list is sorted by revision number, meaning it is
1170 topologically sorted.
1171 topologically sorted.
1171
1172
1172 'heads' and 'common' are both lists of node IDs. If heads is
1173 'heads' and 'common' are both lists of node IDs. If heads is
1173 not supplied, uses all of the revlog's heads. If common is not
1174 not supplied, uses all of the revlog's heads. If common is not
1174 supplied, uses nullid."""
1175 supplied, uses nullid."""
1175 if common is None:
1176 if common is None:
1176 common = [self.nullid]
1177 common = [self.nullid]
1177 if heads is None:
1178 if heads is None:
1178 heads = self.heads()
1179 heads = self.heads()
1179
1180
1180 common = [self.rev(n) for n in common]
1181 common = [self.rev(n) for n in common]
1181 heads = [self.rev(n) for n in heads]
1182 heads = [self.rev(n) for n in heads]
1182
1183
1183 # we want the ancestors, but inclusive
1184 # we want the ancestors, but inclusive
1184 class lazyset:
1185 class lazyset:
1185 def __init__(self, lazyvalues):
1186 def __init__(self, lazyvalues):
1186 self.addedvalues = set()
1187 self.addedvalues = set()
1187 self.lazyvalues = lazyvalues
1188 self.lazyvalues = lazyvalues
1188
1189
1189 def __contains__(self, value):
1190 def __contains__(self, value):
1190 return value in self.addedvalues or value in self.lazyvalues
1191 return value in self.addedvalues or value in self.lazyvalues
1191
1192
1192 def __iter__(self):
1193 def __iter__(self):
1193 added = self.addedvalues
1194 added = self.addedvalues
1194 for r in added:
1195 for r in added:
1195 yield r
1196 yield r
1196 for r in self.lazyvalues:
1197 for r in self.lazyvalues:
1197 if not r in added:
1198 if not r in added:
1198 yield r
1199 yield r
1199
1200
1200 def add(self, value):
1201 def add(self, value):
1201 self.addedvalues.add(value)
1202 self.addedvalues.add(value)
1202
1203
1203 def update(self, values):
1204 def update(self, values):
1204 self.addedvalues.update(values)
1205 self.addedvalues.update(values)
1205
1206
1206 has = lazyset(self.ancestors(common))
1207 has = lazyset(self.ancestors(common))
1207 has.add(nullrev)
1208 has.add(nullrev)
1208 has.update(common)
1209 has.update(common)
1209
1210
1210 # take all ancestors from heads that aren't in has
1211 # take all ancestors from heads that aren't in has
1211 missing = set()
1212 missing = set()
1212 visit = collections.deque(r for r in heads if r not in has)
1213 visit = collections.deque(r for r in heads if r not in has)
1213 while visit:
1214 while visit:
1214 r = visit.popleft()
1215 r = visit.popleft()
1215 if r in missing:
1216 if r in missing:
1216 continue
1217 continue
1217 else:
1218 else:
1218 missing.add(r)
1219 missing.add(r)
1219 for p in self.parentrevs(r):
1220 for p in self.parentrevs(r):
1220 if p not in has:
1221 if p not in has:
1221 visit.append(p)
1222 visit.append(p)
1222 missing = list(missing)
1223 missing = list(missing)
1223 missing.sort()
1224 missing.sort()
1224 return has, [self.node(miss) for miss in missing]
1225 return has, [self.node(miss) for miss in missing]
1225
1226
1226 def incrementalmissingrevs(self, common=None):
1227 def incrementalmissingrevs(self, common=None):
1227 """Return an object that can be used to incrementally compute the
1228 """Return an object that can be used to incrementally compute the
1228 revision numbers of the ancestors of arbitrary sets that are not
1229 revision numbers of the ancestors of arbitrary sets that are not
1229 ancestors of common. This is an ancestor.incrementalmissingancestors
1230 ancestors of common. This is an ancestor.incrementalmissingancestors
1230 object.
1231 object.
1231
1232
1232 'common' is a list of revision numbers. If common is not supplied, uses
1233 'common' is a list of revision numbers. If common is not supplied, uses
1233 nullrev.
1234 nullrev.
1234 """
1235 """
1235 if common is None:
1236 if common is None:
1236 common = [nullrev]
1237 common = [nullrev]
1237
1238
1238 if rustancestor is not None and self.index.rust_ext_compat:
1239 if rustancestor is not None and self.index.rust_ext_compat:
1239 return rustancestor.MissingAncestors(self.index, common)
1240 return rustancestor.MissingAncestors(self.index, common)
1240 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1241 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1241
1242
1242 def findmissingrevs(self, common=None, heads=None):
1243 def findmissingrevs(self, common=None, heads=None):
1243 """Return the revision numbers of the ancestors of heads that
1244 """Return the revision numbers of the ancestors of heads that
1244 are not ancestors of common.
1245 are not ancestors of common.
1245
1246
1246 More specifically, return a list of revision numbers corresponding to
1247 More specifically, return a list of revision numbers corresponding to
1247 nodes N such that every N satisfies the following constraints:
1248 nodes N such that every N satisfies the following constraints:
1248
1249
1249 1. N is an ancestor of some node in 'heads'
1250 1. N is an ancestor of some node in 'heads'
1250 2. N is not an ancestor of any node in 'common'
1251 2. N is not an ancestor of any node in 'common'
1251
1252
1252 The list is sorted by revision number, meaning it is
1253 The list is sorted by revision number, meaning it is
1253 topologically sorted.
1254 topologically sorted.
1254
1255
1255 'heads' and 'common' are both lists of revision numbers. If heads is
1256 'heads' and 'common' are both lists of revision numbers. If heads is
1256 not supplied, uses all of the revlog's heads. If common is not
1257 not supplied, uses all of the revlog's heads. If common is not
1257 supplied, uses nullid."""
1258 supplied, uses nullid."""
1258 if common is None:
1259 if common is None:
1259 common = [nullrev]
1260 common = [nullrev]
1260 if heads is None:
1261 if heads is None:
1261 heads = self.headrevs()
1262 heads = self.headrevs()
1262
1263
1263 inc = self.incrementalmissingrevs(common=common)
1264 inc = self.incrementalmissingrevs(common=common)
1264 return inc.missingancestors(heads)
1265 return inc.missingancestors(heads)
1265
1266
1266 def findmissing(self, common=None, heads=None):
1267 def findmissing(self, common=None, heads=None):
1267 """Return the ancestors of heads that are not ancestors of common.
1268 """Return the ancestors of heads that are not ancestors of common.
1268
1269
1269 More specifically, return a list of nodes N such that every N
1270 More specifically, return a list of nodes N such that every N
1270 satisfies the following constraints:
1271 satisfies the following constraints:
1271
1272
1272 1. N is an ancestor of some node in 'heads'
1273 1. N is an ancestor of some node in 'heads'
1273 2. N is not an ancestor of any node in 'common'
1274 2. N is not an ancestor of any node in 'common'
1274
1275
1275 The list is sorted by revision number, meaning it is
1276 The list is sorted by revision number, meaning it is
1276 topologically sorted.
1277 topologically sorted.
1277
1278
1278 'heads' and 'common' are both lists of node IDs. If heads is
1279 'heads' and 'common' are both lists of node IDs. If heads is
1279 not supplied, uses all of the revlog's heads. If common is not
1280 not supplied, uses all of the revlog's heads. If common is not
1280 supplied, uses nullid."""
1281 supplied, uses nullid."""
1281 if common is None:
1282 if common is None:
1282 common = [self.nullid]
1283 common = [self.nullid]
1283 if heads is None:
1284 if heads is None:
1284 heads = self.heads()
1285 heads = self.heads()
1285
1286
1286 common = [self.rev(n) for n in common]
1287 common = [self.rev(n) for n in common]
1287 heads = [self.rev(n) for n in heads]
1288 heads = [self.rev(n) for n in heads]
1288
1289
1289 inc = self.incrementalmissingrevs(common=common)
1290 inc = self.incrementalmissingrevs(common=common)
1290 return [self.node(r) for r in inc.missingancestors(heads)]
1291 return [self.node(r) for r in inc.missingancestors(heads)]
1291
1292
1292 def nodesbetween(self, roots=None, heads=None):
1293 def nodesbetween(self, roots=None, heads=None):
1293 """Return a topological path from 'roots' to 'heads'.
1294 """Return a topological path from 'roots' to 'heads'.
1294
1295
1295 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1296 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1296 topologically sorted list of all nodes N that satisfy both of
1297 topologically sorted list of all nodes N that satisfy both of
1297 these constraints:
1298 these constraints:
1298
1299
1299 1. N is a descendant of some node in 'roots'
1300 1. N is a descendant of some node in 'roots'
1300 2. N is an ancestor of some node in 'heads'
1301 2. N is an ancestor of some node in 'heads'
1301
1302
1302 Every node is considered to be both a descendant and an ancestor
1303 Every node is considered to be both a descendant and an ancestor
1303 of itself, so every reachable node in 'roots' and 'heads' will be
1304 of itself, so every reachable node in 'roots' and 'heads' will be
1304 included in 'nodes'.
1305 included in 'nodes'.
1305
1306
1306 'outroots' is the list of reachable nodes in 'roots', i.e., the
1307 'outroots' is the list of reachable nodes in 'roots', i.e., the
1307 subset of 'roots' that is returned in 'nodes'. Likewise,
1308 subset of 'roots' that is returned in 'nodes'. Likewise,
1308 'outheads' is the subset of 'heads' that is also in 'nodes'.
1309 'outheads' is the subset of 'heads' that is also in 'nodes'.
1309
1310
1310 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1311 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1311 unspecified, uses nullid as the only root. If 'heads' is
1312 unspecified, uses nullid as the only root. If 'heads' is
1312 unspecified, uses list of all of the revlog's heads."""
1313 unspecified, uses list of all of the revlog's heads."""
1313 nonodes = ([], [], [])
1314 nonodes = ([], [], [])
1314 if roots is not None:
1315 if roots is not None:
1315 roots = list(roots)
1316 roots = list(roots)
1316 if not roots:
1317 if not roots:
1317 return nonodes
1318 return nonodes
1318 lowestrev = min([self.rev(n) for n in roots])
1319 lowestrev = min([self.rev(n) for n in roots])
1319 else:
1320 else:
1320 roots = [self.nullid] # Everybody's a descendant of nullid
1321 roots = [self.nullid] # Everybody's a descendant of nullid
1321 lowestrev = nullrev
1322 lowestrev = nullrev
1322 if (lowestrev == nullrev) and (heads is None):
1323 if (lowestrev == nullrev) and (heads is None):
1323 # We want _all_ the nodes!
1324 # We want _all_ the nodes!
1324 return (
1325 return (
1325 [self.node(r) for r in self],
1326 [self.node(r) for r in self],
1326 [self.nullid],
1327 [self.nullid],
1327 list(self.heads()),
1328 list(self.heads()),
1328 )
1329 )
1329 if heads is None:
1330 if heads is None:
1330 # All nodes are ancestors, so the latest ancestor is the last
1331 # All nodes are ancestors, so the latest ancestor is the last
1331 # node.
1332 # node.
1332 highestrev = len(self) - 1
1333 highestrev = len(self) - 1
1333 # Set ancestors to None to signal that every node is an ancestor.
1334 # Set ancestors to None to signal that every node is an ancestor.
1334 ancestors = None
1335 ancestors = None
1335 # Set heads to an empty dictionary for later discovery of heads
1336 # Set heads to an empty dictionary for later discovery of heads
1336 heads = {}
1337 heads = {}
1337 else:
1338 else:
1338 heads = list(heads)
1339 heads = list(heads)
1339 if not heads:
1340 if not heads:
1340 return nonodes
1341 return nonodes
1341 ancestors = set()
1342 ancestors = set()
1342 # Turn heads into a dictionary so we can remove 'fake' heads.
1343 # Turn heads into a dictionary so we can remove 'fake' heads.
1343 # Also, later we will be using it to filter out the heads we can't
1344 # Also, later we will be using it to filter out the heads we can't
1344 # find from roots.
1345 # find from roots.
1345 heads = dict.fromkeys(heads, False)
1346 heads = dict.fromkeys(heads, False)
1346 # Start at the top and keep marking parents until we're done.
1347 # Start at the top and keep marking parents until we're done.
1347 nodestotag = set(heads)
1348 nodestotag = set(heads)
1348 # Remember where the top was so we can use it as a limit later.
1349 # Remember where the top was so we can use it as a limit later.
1349 highestrev = max([self.rev(n) for n in nodestotag])
1350 highestrev = max([self.rev(n) for n in nodestotag])
1350 while nodestotag:
1351 while nodestotag:
1351 # grab a node to tag
1352 # grab a node to tag
1352 n = nodestotag.pop()
1353 n = nodestotag.pop()
1353 # Never tag nullid
1354 # Never tag nullid
1354 if n == self.nullid:
1355 if n == self.nullid:
1355 continue
1356 continue
1356 # A node's revision number represents its place in a
1357 # A node's revision number represents its place in a
1357 # topologically sorted list of nodes.
1358 # topologically sorted list of nodes.
1358 r = self.rev(n)
1359 r = self.rev(n)
1359 if r >= lowestrev:
1360 if r >= lowestrev:
1360 if n not in ancestors:
1361 if n not in ancestors:
1361 # If we are possibly a descendant of one of the roots
1362 # If we are possibly a descendant of one of the roots
1362 # and we haven't already been marked as an ancestor
1363 # and we haven't already been marked as an ancestor
1363 ancestors.add(n) # Mark as ancestor
1364 ancestors.add(n) # Mark as ancestor
1364 # Add non-nullid parents to list of nodes to tag.
1365 # Add non-nullid parents to list of nodes to tag.
1365 nodestotag.update(
1366 nodestotag.update(
1366 [p for p in self.parents(n) if p != self.nullid]
1367 [p for p in self.parents(n) if p != self.nullid]
1367 )
1368 )
1368 elif n in heads: # We've seen it before, is it a fake head?
1369 elif n in heads: # We've seen it before, is it a fake head?
1369 # So it is, real heads should not be the ancestors of
1370 # So it is, real heads should not be the ancestors of
1370 # any other heads.
1371 # any other heads.
1371 heads.pop(n)
1372 heads.pop(n)
1372 if not ancestors:
1373 if not ancestors:
1373 return nonodes
1374 return nonodes
1374 # Now that we have our set of ancestors, we want to remove any
1375 # Now that we have our set of ancestors, we want to remove any
1375 # roots that are not ancestors.
1376 # roots that are not ancestors.
1376
1377
1377 # If one of the roots was nullid, everything is included anyway.
1378 # If one of the roots was nullid, everything is included anyway.
1378 if lowestrev > nullrev:
1379 if lowestrev > nullrev:
1379 # But, since we weren't, let's recompute the lowest rev to not
1380 # But, since we weren't, let's recompute the lowest rev to not
1380 # include roots that aren't ancestors.
1381 # include roots that aren't ancestors.
1381
1382
1382 # Filter out roots that aren't ancestors of heads
1383 # Filter out roots that aren't ancestors of heads
1383 roots = [root for root in roots if root in ancestors]
1384 roots = [root for root in roots if root in ancestors]
1384 # Recompute the lowest revision
1385 # Recompute the lowest revision
1385 if roots:
1386 if roots:
1386 lowestrev = min([self.rev(root) for root in roots])
1387 lowestrev = min([self.rev(root) for root in roots])
1387 else:
1388 else:
1388 # No more roots? Return empty list
1389 # No more roots? Return empty list
1389 return nonodes
1390 return nonodes
1390 else:
1391 else:
1391 # We are descending from nullid, and don't need to care about
1392 # We are descending from nullid, and don't need to care about
1392 # any other roots.
1393 # any other roots.
1393 lowestrev = nullrev
1394 lowestrev = nullrev
1394 roots = [self.nullid]
1395 roots = [self.nullid]
1395 # Transform our roots list into a set.
1396 # Transform our roots list into a set.
1396 descendants = set(roots)
1397 descendants = set(roots)
1397 # Also, keep the original roots so we can filter out roots that aren't
1398 # Also, keep the original roots so we can filter out roots that aren't
1398 # 'real' roots (i.e. are descended from other roots).
1399 # 'real' roots (i.e. are descended from other roots).
1399 roots = descendants.copy()
1400 roots = descendants.copy()
1400 # Our topologically sorted list of output nodes.
1401 # Our topologically sorted list of output nodes.
1401 orderedout = []
1402 orderedout = []
1402 # Don't start at nullid since we don't want nullid in our output list,
1403 # Don't start at nullid since we don't want nullid in our output list,
1403 # and if nullid shows up in descendants, empty parents will look like
1404 # and if nullid shows up in descendants, empty parents will look like
1404 # they're descendants.
1405 # they're descendants.
1405 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1406 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1406 n = self.node(r)
1407 n = self.node(r)
1407 isdescendant = False
1408 isdescendant = False
1408 if lowestrev == nullrev: # Everybody is a descendant of nullid
1409 if lowestrev == nullrev: # Everybody is a descendant of nullid
1409 isdescendant = True
1410 isdescendant = True
1410 elif n in descendants:
1411 elif n in descendants:
1411 # n is already a descendant
1412 # n is already a descendant
1412 isdescendant = True
1413 isdescendant = True
1413 # This check only needs to be done here because all the roots
1414 # This check only needs to be done here because all the roots
1414 # will start being marked is descendants before the loop.
1415 # will start being marked is descendants before the loop.
1415 if n in roots:
1416 if n in roots:
1416 # If n was a root, check if it's a 'real' root.
1417 # If n was a root, check if it's a 'real' root.
1417 p = tuple(self.parents(n))
1418 p = tuple(self.parents(n))
1418 # If any of its parents are descendants, it's not a root.
1419 # If any of its parents are descendants, it's not a root.
1419 if (p[0] in descendants) or (p[1] in descendants):
1420 if (p[0] in descendants) or (p[1] in descendants):
1420 roots.remove(n)
1421 roots.remove(n)
1421 else:
1422 else:
1422 p = tuple(self.parents(n))
1423 p = tuple(self.parents(n))
1423 # A node is a descendant if either of its parents are
1424 # A node is a descendant if either of its parents are
1424 # descendants. (We seeded the dependents list with the roots
1425 # descendants. (We seeded the dependents list with the roots
1425 # up there, remember?)
1426 # up there, remember?)
1426 if (p[0] in descendants) or (p[1] in descendants):
1427 if (p[0] in descendants) or (p[1] in descendants):
1427 descendants.add(n)
1428 descendants.add(n)
1428 isdescendant = True
1429 isdescendant = True
1429 if isdescendant and ((ancestors is None) or (n in ancestors)):
1430 if isdescendant and ((ancestors is None) or (n in ancestors)):
1430 # Only include nodes that are both descendants and ancestors.
1431 # Only include nodes that are both descendants and ancestors.
1431 orderedout.append(n)
1432 orderedout.append(n)
1432 if (ancestors is not None) and (n in heads):
1433 if (ancestors is not None) and (n in heads):
1433 # We're trying to figure out which heads are reachable
1434 # We're trying to figure out which heads are reachable
1434 # from roots.
1435 # from roots.
1435 # Mark this head as having been reached
1436 # Mark this head as having been reached
1436 heads[n] = True
1437 heads[n] = True
1437 elif ancestors is None:
1438 elif ancestors is None:
1438 # Otherwise, we're trying to discover the heads.
1439 # Otherwise, we're trying to discover the heads.
1439 # Assume this is a head because if it isn't, the next step
1440 # Assume this is a head because if it isn't, the next step
1440 # will eventually remove it.
1441 # will eventually remove it.
1441 heads[n] = True
1442 heads[n] = True
1442 # But, obviously its parents aren't.
1443 # But, obviously its parents aren't.
1443 for p in self.parents(n):
1444 for p in self.parents(n):
1444 heads.pop(p, None)
1445 heads.pop(p, None)
1445 heads = [head for head, flag in heads.items() if flag]
1446 heads = [head for head, flag in heads.items() if flag]
1446 roots = list(roots)
1447 roots = list(roots)
1447 assert orderedout
1448 assert orderedout
1448 assert roots
1449 assert roots
1449 assert heads
1450 assert heads
1450 return (orderedout, roots, heads)
1451 return (orderedout, roots, heads)
1451
1452
1452 def headrevs(self, revs=None):
1453 def headrevs(self, revs=None):
1453 if revs is None:
1454 if revs is None:
1454 try:
1455 try:
1455 return self.index.headrevs()
1456 return self.index.headrevs()
1456 except AttributeError:
1457 except AttributeError:
1457 return self._headrevs()
1458 return self._headrevs()
1458 if rustdagop is not None and self.index.rust_ext_compat:
1459 if rustdagop is not None and self.index.rust_ext_compat:
1459 return rustdagop.headrevs(self.index, revs)
1460 return rustdagop.headrevs(self.index, revs)
1460 return dagop.headrevs(revs, self._uncheckedparentrevs)
1461 return dagop.headrevs(revs, self._uncheckedparentrevs)
1461
1462
1462 def computephases(self, roots):
1463 def computephases(self, roots):
1463 return self.index.computephasesmapsets(roots)
1464 return self.index.computephasesmapsets(roots)
1464
1465
1465 def _headrevs(self):
1466 def _headrevs(self):
1466 count = len(self)
1467 count = len(self)
1467 if not count:
1468 if not count:
1468 return [nullrev]
1469 return [nullrev]
1469 # we won't iter over filtered rev so nobody is a head at start
1470 # we won't iter over filtered rev so nobody is a head at start
1470 ishead = [0] * (count + 1)
1471 ishead = [0] * (count + 1)
1471 index = self.index
1472 index = self.index
1472 for r in self:
1473 for r in self:
1473 ishead[r] = 1 # I may be an head
1474 ishead[r] = 1 # I may be an head
1474 e = index[r]
1475 e = index[r]
1475 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1476 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1476 return [r for r, val in enumerate(ishead) if val]
1477 return [r for r, val in enumerate(ishead) if val]
1477
1478
1478 def heads(self, start=None, stop=None):
1479 def heads(self, start=None, stop=None):
1479 """return the list of all nodes that have no children
1480 """return the list of all nodes that have no children
1480
1481
1481 if start is specified, only heads that are descendants of
1482 if start is specified, only heads that are descendants of
1482 start will be returned
1483 start will be returned
1483 if stop is specified, it will consider all the revs from stop
1484 if stop is specified, it will consider all the revs from stop
1484 as if they had no children
1485 as if they had no children
1485 """
1486 """
1486 if start is None and stop is None:
1487 if start is None and stop is None:
1487 if not len(self):
1488 if not len(self):
1488 return [self.nullid]
1489 return [self.nullid]
1489 return [self.node(r) for r in self.headrevs()]
1490 return [self.node(r) for r in self.headrevs()]
1490
1491
1491 if start is None:
1492 if start is None:
1492 start = nullrev
1493 start = nullrev
1493 else:
1494 else:
1494 start = self.rev(start)
1495 start = self.rev(start)
1495
1496
1496 stoprevs = {self.rev(n) for n in stop or []}
1497 stoprevs = {self.rev(n) for n in stop or []}
1497
1498
1498 revs = dagop.headrevssubset(
1499 revs = dagop.headrevssubset(
1499 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1500 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1500 )
1501 )
1501
1502
1502 return [self.node(rev) for rev in revs]
1503 return [self.node(rev) for rev in revs]
1503
1504
1504 def children(self, node):
1505 def children(self, node):
1505 """find the children of a given node"""
1506 """find the children of a given node"""
1506 c = []
1507 c = []
1507 p = self.rev(node)
1508 p = self.rev(node)
1508 for r in self.revs(start=p + 1):
1509 for r in self.revs(start=p + 1):
1509 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1510 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1510 if prevs:
1511 if prevs:
1511 for pr in prevs:
1512 for pr in prevs:
1512 if pr == p:
1513 if pr == p:
1513 c.append(self.node(r))
1514 c.append(self.node(r))
1514 elif p == nullrev:
1515 elif p == nullrev:
1515 c.append(self.node(r))
1516 c.append(self.node(r))
1516 return c
1517 return c
1517
1518
1518 def commonancestorsheads(self, a, b):
1519 def commonancestorsheads(self, a, b):
1519 """calculate all the heads of the common ancestors of nodes a and b"""
1520 """calculate all the heads of the common ancestors of nodes a and b"""
1520 a, b = self.rev(a), self.rev(b)
1521 a, b = self.rev(a), self.rev(b)
1521 ancs = self._commonancestorsheads(a, b)
1522 ancs = self._commonancestorsheads(a, b)
1522 return pycompat.maplist(self.node, ancs)
1523 return pycompat.maplist(self.node, ancs)
1523
1524
1524 def _commonancestorsheads(self, *revs):
1525 def _commonancestorsheads(self, *revs):
1525 """calculate all the heads of the common ancestors of revs"""
1526 """calculate all the heads of the common ancestors of revs"""
1526 try:
1527 try:
1527 ancs = self.index.commonancestorsheads(*revs)
1528 ancs = self.index.commonancestorsheads(*revs)
1528 except (AttributeError, OverflowError): # C implementation failed
1529 except (AttributeError, OverflowError): # C implementation failed
1529 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1530 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1530 return ancs
1531 return ancs
1531
1532
1532 def isancestor(self, a, b):
1533 def isancestor(self, a, b):
1533 """return True if node a is an ancestor of node b
1534 """return True if node a is an ancestor of node b
1534
1535
1535 A revision is considered an ancestor of itself."""
1536 A revision is considered an ancestor of itself."""
1536 a, b = self.rev(a), self.rev(b)
1537 a, b = self.rev(a), self.rev(b)
1537 return self.isancestorrev(a, b)
1538 return self.isancestorrev(a, b)
1538
1539
1539 def isancestorrev(self, a, b):
1540 def isancestorrev(self, a, b):
1540 """return True if revision a is an ancestor of revision b
1541 """return True if revision a is an ancestor of revision b
1541
1542
1542 A revision is considered an ancestor of itself.
1543 A revision is considered an ancestor of itself.
1543
1544
1544 The implementation of this is trivial but the use of
1545 The implementation of this is trivial but the use of
1545 reachableroots is not."""
1546 reachableroots is not."""
1546 if a == nullrev:
1547 if a == nullrev:
1547 return True
1548 return True
1548 elif a == b:
1549 elif a == b:
1549 return True
1550 return True
1550 elif a > b:
1551 elif a > b:
1551 return False
1552 return False
1552 return bool(self.reachableroots(a, [b], [a], includepath=False))
1553 return bool(self.reachableroots(a, [b], [a], includepath=False))
1553
1554
1554 def reachableroots(self, minroot, heads, roots, includepath=False):
1555 def reachableroots(self, minroot, heads, roots, includepath=False):
1555 """return (heads(::(<roots> and <roots>::<heads>)))
1556 """return (heads(::(<roots> and <roots>::<heads>)))
1556
1557
1557 If includepath is True, return (<roots>::<heads>)."""
1558 If includepath is True, return (<roots>::<heads>)."""
1558 try:
1559 try:
1559 return self.index.reachableroots2(
1560 return self.index.reachableroots2(
1560 minroot, heads, roots, includepath
1561 minroot, heads, roots, includepath
1561 )
1562 )
1562 except AttributeError:
1563 except AttributeError:
1563 return dagop._reachablerootspure(
1564 return dagop._reachablerootspure(
1564 self.parentrevs, minroot, roots, heads, includepath
1565 self.parentrevs, minroot, roots, heads, includepath
1565 )
1566 )
1566
1567
1567 def ancestor(self, a, b):
1568 def ancestor(self, a, b):
1568 """calculate the "best" common ancestor of nodes a and b"""
1569 """calculate the "best" common ancestor of nodes a and b"""
1569
1570
1570 a, b = self.rev(a), self.rev(b)
1571 a, b = self.rev(a), self.rev(b)
1571 try:
1572 try:
1572 ancs = self.index.ancestors(a, b)
1573 ancs = self.index.ancestors(a, b)
1573 except (AttributeError, OverflowError):
1574 except (AttributeError, OverflowError):
1574 ancs = ancestor.ancestors(self.parentrevs, a, b)
1575 ancs = ancestor.ancestors(self.parentrevs, a, b)
1575 if ancs:
1576 if ancs:
1576 # choose a consistent winner when there's a tie
1577 # choose a consistent winner when there's a tie
1577 return min(map(self.node, ancs))
1578 return min(map(self.node, ancs))
1578 return self.nullid
1579 return self.nullid
1579
1580
1580 def _match(self, id):
1581 def _match(self, id):
1581 if isinstance(id, int):
1582 if isinstance(id, int):
1582 # rev
1583 # rev
1583 return self.node(id)
1584 return self.node(id)
1584 if len(id) == self.nodeconstants.nodelen:
1585 if len(id) == self.nodeconstants.nodelen:
1585 # possibly a binary node
1586 # possibly a binary node
1586 # odds of a binary node being all hex in ASCII are 1 in 10**25
1587 # odds of a binary node being all hex in ASCII are 1 in 10**25
1587 try:
1588 try:
1588 node = id
1589 node = id
1589 self.rev(node) # quick search the index
1590 self.rev(node) # quick search the index
1590 return node
1591 return node
1591 except error.LookupError:
1592 except error.LookupError:
1592 pass # may be partial hex id
1593 pass # may be partial hex id
1593 try:
1594 try:
1594 # str(rev)
1595 # str(rev)
1595 rev = int(id)
1596 rev = int(id)
1596 if b"%d" % rev != id:
1597 if b"%d" % rev != id:
1597 raise ValueError
1598 raise ValueError
1598 if rev < 0:
1599 if rev < 0:
1599 rev = len(self) + rev
1600 rev = len(self) + rev
1600 if rev < 0 or rev >= len(self):
1601 if rev < 0 or rev >= len(self):
1601 raise ValueError
1602 raise ValueError
1602 return self.node(rev)
1603 return self.node(rev)
1603 except (ValueError, OverflowError):
1604 except (ValueError, OverflowError):
1604 pass
1605 pass
1605 if len(id) == 2 * self.nodeconstants.nodelen:
1606 if len(id) == 2 * self.nodeconstants.nodelen:
1606 try:
1607 try:
1607 # a full hex nodeid?
1608 # a full hex nodeid?
1608 node = bin(id)
1609 node = bin(id)
1609 self.rev(node)
1610 self.rev(node)
1610 return node
1611 return node
1611 except (binascii.Error, error.LookupError):
1612 except (binascii.Error, error.LookupError):
1612 pass
1613 pass
1613
1614
1614 def _partialmatch(self, id):
1615 def _partialmatch(self, id):
1615 # we don't care wdirfilenodeids as they should be always full hash
1616 # we don't care wdirfilenodeids as they should be always full hash
1616 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1617 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1617 ambiguous = False
1618 ambiguous = False
1618 try:
1619 try:
1619 partial = self.index.partialmatch(id)
1620 partial = self.index.partialmatch(id)
1620 if partial and self.hasnode(partial):
1621 if partial and self.hasnode(partial):
1621 if maybewdir:
1622 if maybewdir:
1622 # single 'ff...' match in radix tree, ambiguous with wdir
1623 # single 'ff...' match in radix tree, ambiguous with wdir
1623 ambiguous = True
1624 ambiguous = True
1624 else:
1625 else:
1625 return partial
1626 return partial
1626 elif maybewdir:
1627 elif maybewdir:
1627 # no 'ff...' match in radix tree, wdir identified
1628 # no 'ff...' match in radix tree, wdir identified
1628 raise error.WdirUnsupported
1629 raise error.WdirUnsupported
1629 else:
1630 else:
1630 return None
1631 return None
1631 except error.RevlogError:
1632 except error.RevlogError:
1632 # parsers.c radix tree lookup gave multiple matches
1633 # parsers.c radix tree lookup gave multiple matches
1633 # fast path: for unfiltered changelog, radix tree is accurate
1634 # fast path: for unfiltered changelog, radix tree is accurate
1634 if not getattr(self, 'filteredrevs', None):
1635 if not getattr(self, 'filteredrevs', None):
1635 ambiguous = True
1636 ambiguous = True
1636 # fall through to slow path that filters hidden revisions
1637 # fall through to slow path that filters hidden revisions
1637 except (AttributeError, ValueError):
1638 except (AttributeError, ValueError):
1638 # we are pure python, or key is not hex
1639 # we are pure python, or key is not hex
1639 pass
1640 pass
1640 if ambiguous:
1641 if ambiguous:
1641 raise error.AmbiguousPrefixLookupError(
1642 raise error.AmbiguousPrefixLookupError(
1642 id, self.display_id, _(b'ambiguous identifier')
1643 id, self.display_id, _(b'ambiguous identifier')
1643 )
1644 )
1644
1645
1645 if id in self._pcache:
1646 if id in self._pcache:
1646 return self._pcache[id]
1647 return self._pcache[id]
1647
1648
1648 if len(id) <= 40:
1649 if len(id) <= 40:
1649 # hex(node)[:...]
1650 # hex(node)[:...]
1650 l = len(id) // 2 * 2 # grab an even number of digits
1651 l = len(id) // 2 * 2 # grab an even number of digits
1651 try:
1652 try:
1652 # we're dropping the last digit, so let's check that it's hex,
1653 # we're dropping the last digit, so let's check that it's hex,
1653 # to avoid the expensive computation below if it's not
1654 # to avoid the expensive computation below if it's not
1654 if len(id) % 2 > 0:
1655 if len(id) % 2 > 0:
1655 if not (id[-1] in hexdigits):
1656 if not (id[-1] in hexdigits):
1656 return None
1657 return None
1657 prefix = bin(id[:l])
1658 prefix = bin(id[:l])
1658 except binascii.Error:
1659 except binascii.Error:
1659 pass
1660 pass
1660 else:
1661 else:
1661 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1662 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1662 nl = [
1663 nl = [
1663 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1664 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1664 ]
1665 ]
1665 if self.nodeconstants.nullhex.startswith(id):
1666 if self.nodeconstants.nullhex.startswith(id):
1666 nl.append(self.nullid)
1667 nl.append(self.nullid)
1667 if len(nl) > 0:
1668 if len(nl) > 0:
1668 if len(nl) == 1 and not maybewdir:
1669 if len(nl) == 1 and not maybewdir:
1669 self._pcache[id] = nl[0]
1670 self._pcache[id] = nl[0]
1670 return nl[0]
1671 return nl[0]
1671 raise error.AmbiguousPrefixLookupError(
1672 raise error.AmbiguousPrefixLookupError(
1672 id, self.display_id, _(b'ambiguous identifier')
1673 id, self.display_id, _(b'ambiguous identifier')
1673 )
1674 )
1674 if maybewdir:
1675 if maybewdir:
1675 raise error.WdirUnsupported
1676 raise error.WdirUnsupported
1676 return None
1677 return None
1677
1678
1678 def lookup(self, id):
1679 def lookup(self, id):
1679 """locate a node based on:
1680 """locate a node based on:
1680 - revision number or str(revision number)
1681 - revision number or str(revision number)
1681 - nodeid or subset of hex nodeid
1682 - nodeid or subset of hex nodeid
1682 """
1683 """
1683 n = self._match(id)
1684 n = self._match(id)
1684 if n is not None:
1685 if n is not None:
1685 return n
1686 return n
1686 n = self._partialmatch(id)
1687 n = self._partialmatch(id)
1687 if n:
1688 if n:
1688 return n
1689 return n
1689
1690
1690 raise error.LookupError(id, self.display_id, _(b'no match found'))
1691 raise error.LookupError(id, self.display_id, _(b'no match found'))
1691
1692
1692 def shortest(self, node, minlength=1):
1693 def shortest(self, node, minlength=1):
1693 """Find the shortest unambiguous prefix that matches node."""
1694 """Find the shortest unambiguous prefix that matches node."""
1694
1695
1695 def isvalid(prefix):
1696 def isvalid(prefix):
1696 try:
1697 try:
1697 matchednode = self._partialmatch(prefix)
1698 matchednode = self._partialmatch(prefix)
1698 except error.AmbiguousPrefixLookupError:
1699 except error.AmbiguousPrefixLookupError:
1699 return False
1700 return False
1700 except error.WdirUnsupported:
1701 except error.WdirUnsupported:
1701 # single 'ff...' match
1702 # single 'ff...' match
1702 return True
1703 return True
1703 if matchednode is None:
1704 if matchednode is None:
1704 raise error.LookupError(node, self.display_id, _(b'no node'))
1705 raise error.LookupError(node, self.display_id, _(b'no node'))
1705 return True
1706 return True
1706
1707
1707 def maybewdir(prefix):
1708 def maybewdir(prefix):
1708 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1709 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1709
1710
1710 hexnode = hex(node)
1711 hexnode = hex(node)
1711
1712
1712 def disambiguate(hexnode, minlength):
1713 def disambiguate(hexnode, minlength):
1713 """Disambiguate against wdirid."""
1714 """Disambiguate against wdirid."""
1714 for length in range(minlength, len(hexnode) + 1):
1715 for length in range(minlength, len(hexnode) + 1):
1715 prefix = hexnode[:length]
1716 prefix = hexnode[:length]
1716 if not maybewdir(prefix):
1717 if not maybewdir(prefix):
1717 return prefix
1718 return prefix
1718
1719
1719 if not getattr(self, 'filteredrevs', None):
1720 if not getattr(self, 'filteredrevs', None):
1720 try:
1721 try:
1721 length = max(self.index.shortest(node), minlength)
1722 length = max(self.index.shortest(node), minlength)
1722 return disambiguate(hexnode, length)
1723 return disambiguate(hexnode, length)
1723 except error.RevlogError:
1724 except error.RevlogError:
1724 if node != self.nodeconstants.wdirid:
1725 if node != self.nodeconstants.wdirid:
1725 raise error.LookupError(
1726 raise error.LookupError(
1726 node, self.display_id, _(b'no node')
1727 node, self.display_id, _(b'no node')
1727 )
1728 )
1728 except AttributeError:
1729 except AttributeError:
1729 # Fall through to pure code
1730 # Fall through to pure code
1730 pass
1731 pass
1731
1732
1732 if node == self.nodeconstants.wdirid:
1733 if node == self.nodeconstants.wdirid:
1733 for length in range(minlength, len(hexnode) + 1):
1734 for length in range(minlength, len(hexnode) + 1):
1734 prefix = hexnode[:length]
1735 prefix = hexnode[:length]
1735 if isvalid(prefix):
1736 if isvalid(prefix):
1736 return prefix
1737 return prefix
1737
1738
1738 for length in range(minlength, len(hexnode) + 1):
1739 for length in range(minlength, len(hexnode) + 1):
1739 prefix = hexnode[:length]
1740 prefix = hexnode[:length]
1740 if isvalid(prefix):
1741 if isvalid(prefix):
1741 return disambiguate(hexnode, length)
1742 return disambiguate(hexnode, length)
1742
1743
1743 def cmp(self, node, text):
1744 def cmp(self, node, text):
1744 """compare text with a given file revision
1745 """compare text with a given file revision
1745
1746
1746 returns True if text is different than what is stored.
1747 returns True if text is different than what is stored.
1747 """
1748 """
1748 p1, p2 = self.parents(node)
1749 p1, p2 = self.parents(node)
1749 return storageutil.hashrevisionsha1(text, p1, p2) != node
1750 return storageutil.hashrevisionsha1(text, p1, p2) != node
1750
1751
1751 def _getsegmentforrevs(self, startrev, endrev, df=None):
1752 def _getsegmentforrevs(self, startrev, endrev, df=None):
1752 """Obtain a segment of raw data corresponding to a range of revisions.
1753 """Obtain a segment of raw data corresponding to a range of revisions.
1753
1754
1754 Accepts the start and end revisions and an optional already-open
1755 Accepts the start and end revisions and an optional already-open
1755 file handle to be used for reading. If the file handle is read, its
1756 file handle to be used for reading. If the file handle is read, its
1756 seek position will not be preserved.
1757 seek position will not be preserved.
1757
1758
1758 Requests for data may be satisfied by a cache.
1759 Requests for data may be satisfied by a cache.
1759
1760
1760 Returns a 2-tuple of (offset, data) for the requested range of
1761 Returns a 2-tuple of (offset, data) for the requested range of
1761 revisions. Offset is the integer offset from the beginning of the
1762 revisions. Offset is the integer offset from the beginning of the
1762 revlog and data is a str or buffer of the raw byte data.
1763 revlog and data is a str or buffer of the raw byte data.
1763
1764
1764 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1765 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1765 to determine where each revision's data begins and ends.
1766 to determine where each revision's data begins and ends.
1766 """
1767 """
1767 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1768 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1768 # (functions are expensive).
1769 # (functions are expensive).
1769 index = self.index
1770 index = self.index
1770 istart = index[startrev]
1771 istart = index[startrev]
1771 start = int(istart[0] >> 16)
1772 start = int(istart[0] >> 16)
1772 if startrev == endrev:
1773 if startrev == endrev:
1773 end = start + istart[1]
1774 end = start + istart[1]
1774 else:
1775 else:
1775 iend = index[endrev]
1776 iend = index[endrev]
1776 end = int(iend[0] >> 16) + iend[1]
1777 end = int(iend[0] >> 16) + iend[1]
1777
1778
1778 if self._inline:
1779 if self._inline:
1779 start += (startrev + 1) * self.index.entry_size
1780 start += (startrev + 1) * self.index.entry_size
1780 end += (endrev + 1) * self.index.entry_size
1781 end += (endrev + 1) * self.index.entry_size
1781 length = end - start
1782 length = end - start
1782
1783
1783 return start, self._segmentfile.read_chunk(start, length, df)
1784 return start, self._segmentfile.read_chunk(start, length, df)
1784
1785
1785 def _chunk(self, rev, df=None):
1786 def _chunk(self, rev, df=None):
1786 """Obtain a single decompressed chunk for a revision.
1787 """Obtain a single decompressed chunk for a revision.
1787
1788
1788 Accepts an integer revision and an optional already-open file handle
1789 Accepts an integer revision and an optional already-open file handle
1789 to be used for reading. If used, the seek position of the file will not
1790 to be used for reading. If used, the seek position of the file will not
1790 be preserved.
1791 be preserved.
1791
1792
1792 Returns a str holding uncompressed data for the requested revision.
1793 Returns a str holding uncompressed data for the requested revision.
1793 """
1794 """
1794 compression_mode = self.index[rev][10]
1795 compression_mode = self.index[rev][10]
1795 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1796 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1796 if compression_mode == COMP_MODE_PLAIN:
1797 if compression_mode == COMP_MODE_PLAIN:
1797 return data
1798 return data
1798 elif compression_mode == COMP_MODE_DEFAULT:
1799 elif compression_mode == COMP_MODE_DEFAULT:
1799 return self._decompressor(data)
1800 return self._decompressor(data)
1800 elif compression_mode == COMP_MODE_INLINE:
1801 elif compression_mode == COMP_MODE_INLINE:
1801 return self.decompress(data)
1802 return self.decompress(data)
1802 else:
1803 else:
1803 msg = b'unknown compression mode %d'
1804 msg = b'unknown compression mode %d'
1804 msg %= compression_mode
1805 msg %= compression_mode
1805 raise error.RevlogError(msg)
1806 raise error.RevlogError(msg)
1806
1807
1807 def _chunks(self, revs, df=None, targetsize=None):
1808 def _chunks(self, revs, df=None, targetsize=None):
1808 """Obtain decompressed chunks for the specified revisions.
1809 """Obtain decompressed chunks for the specified revisions.
1809
1810
1810 Accepts an iterable of numeric revisions that are assumed to be in
1811 Accepts an iterable of numeric revisions that are assumed to be in
1811 ascending order. Also accepts an optional already-open file handle
1812 ascending order. Also accepts an optional already-open file handle
1812 to be used for reading. If used, the seek position of the file will
1813 to be used for reading. If used, the seek position of the file will
1813 not be preserved.
1814 not be preserved.
1814
1815
1815 This function is similar to calling ``self._chunk()`` multiple times,
1816 This function is similar to calling ``self._chunk()`` multiple times,
1816 but is faster.
1817 but is faster.
1817
1818
1818 Returns a list with decompressed data for each requested revision.
1819 Returns a list with decompressed data for each requested revision.
1819 """
1820 """
1820 if not revs:
1821 if not revs:
1821 return []
1822 return []
1822 start = self.start
1823 start = self.start
1823 length = self.length
1824 length = self.length
1824 inline = self._inline
1825 inline = self._inline
1825 iosize = self.index.entry_size
1826 iosize = self.index.entry_size
1826 buffer = util.buffer
1827 buffer = util.buffer
1827
1828
1828 l = []
1829 l = []
1829 ladd = l.append
1830 ladd = l.append
1830
1831
1831 if not self._withsparseread:
1832 if not self._withsparseread:
1832 slicedchunks = (revs,)
1833 slicedchunks = (revs,)
1833 else:
1834 else:
1834 slicedchunks = deltautil.slicechunk(
1835 slicedchunks = deltautil.slicechunk(
1835 self, revs, targetsize=targetsize
1836 self, revs, targetsize=targetsize
1836 )
1837 )
1837
1838
1838 for revschunk in slicedchunks:
1839 for revschunk in slicedchunks:
1839 firstrev = revschunk[0]
1840 firstrev = revschunk[0]
1840 # Skip trailing revisions with empty diff
1841 # Skip trailing revisions with empty diff
1841 for lastrev in revschunk[::-1]:
1842 for lastrev in revschunk[::-1]:
1842 if length(lastrev) != 0:
1843 if length(lastrev) != 0:
1843 break
1844 break
1844
1845
1845 try:
1846 try:
1846 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1847 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1847 except OverflowError:
1848 except OverflowError:
1848 # issue4215 - we can't cache a run of chunks greater than
1849 # issue4215 - we can't cache a run of chunks greater than
1849 # 2G on Windows
1850 # 2G on Windows
1850 return [self._chunk(rev, df=df) for rev in revschunk]
1851 return [self._chunk(rev, df=df) for rev in revschunk]
1851
1852
1852 decomp = self.decompress
1853 decomp = self.decompress
1853 # self._decompressor might be None, but will not be used in that case
1854 # self._decompressor might be None, but will not be used in that case
1854 def_decomp = self._decompressor
1855 def_decomp = self._decompressor
1855 for rev in revschunk:
1856 for rev in revschunk:
1856 chunkstart = start(rev)
1857 chunkstart = start(rev)
1857 if inline:
1858 if inline:
1858 chunkstart += (rev + 1) * iosize
1859 chunkstart += (rev + 1) * iosize
1859 chunklength = length(rev)
1860 chunklength = length(rev)
1860 comp_mode = self.index[rev][10]
1861 comp_mode = self.index[rev][10]
1861 c = buffer(data, chunkstart - offset, chunklength)
1862 c = buffer(data, chunkstart - offset, chunklength)
1862 if comp_mode == COMP_MODE_PLAIN:
1863 if comp_mode == COMP_MODE_PLAIN:
1863 ladd(c)
1864 ladd(c)
1864 elif comp_mode == COMP_MODE_INLINE:
1865 elif comp_mode == COMP_MODE_INLINE:
1865 ladd(decomp(c))
1866 ladd(decomp(c))
1866 elif comp_mode == COMP_MODE_DEFAULT:
1867 elif comp_mode == COMP_MODE_DEFAULT:
1867 ladd(def_decomp(c))
1868 ladd(def_decomp(c))
1868 else:
1869 else:
1869 msg = b'unknown compression mode %d'
1870 msg = b'unknown compression mode %d'
1870 msg %= comp_mode
1871 msg %= comp_mode
1871 raise error.RevlogError(msg)
1872 raise error.RevlogError(msg)
1872
1873
1873 return l
1874 return l
1874
1875
1875 def deltaparent(self, rev):
1876 def deltaparent(self, rev):
1876 """return deltaparent of the given revision"""
1877 """return deltaparent of the given revision"""
1877 base = self.index[rev][3]
1878 base = self.index[rev][3]
1878 if base == rev:
1879 if base == rev:
1879 return nullrev
1880 return nullrev
1880 elif self._generaldelta:
1881 elif self._generaldelta:
1881 return base
1882 return base
1882 else:
1883 else:
1883 return rev - 1
1884 return rev - 1
1884
1885
1885 def issnapshot(self, rev):
1886 def issnapshot(self, rev):
1886 """tells whether rev is a snapshot"""
1887 """tells whether rev is a snapshot"""
1887 if not self._sparserevlog:
1888 if not self._sparserevlog:
1888 return self.deltaparent(rev) == nullrev
1889 return self.deltaparent(rev) == nullrev
1889 elif hasattr(self.index, 'issnapshot'):
1890 elif hasattr(self.index, 'issnapshot'):
1890 # directly assign the method to cache the testing and access
1891 # directly assign the method to cache the testing and access
1891 self.issnapshot = self.index.issnapshot
1892 self.issnapshot = self.index.issnapshot
1892 return self.issnapshot(rev)
1893 return self.issnapshot(rev)
1893 if rev == nullrev:
1894 if rev == nullrev:
1894 return True
1895 return True
1895 entry = self.index[rev]
1896 entry = self.index[rev]
1896 base = entry[3]
1897 base = entry[3]
1897 if base == rev:
1898 if base == rev:
1898 return True
1899 return True
1899 if base == nullrev:
1900 if base == nullrev:
1900 return True
1901 return True
1901 p1 = entry[5]
1902 p1 = entry[5]
1902 while self.length(p1) == 0:
1903 while self.length(p1) == 0:
1903 b = self.deltaparent(p1)
1904 b = self.deltaparent(p1)
1904 if b == p1:
1905 if b == p1:
1905 break
1906 break
1906 p1 = b
1907 p1 = b
1907 p2 = entry[6]
1908 p2 = entry[6]
1908 while self.length(p2) == 0:
1909 while self.length(p2) == 0:
1909 b = self.deltaparent(p2)
1910 b = self.deltaparent(p2)
1910 if b == p2:
1911 if b == p2:
1911 break
1912 break
1912 p2 = b
1913 p2 = b
1913 if base == p1 or base == p2:
1914 if base == p1 or base == p2:
1914 return False
1915 return False
1915 return self.issnapshot(base)
1916 return self.issnapshot(base)
1916
1917
1917 def snapshotdepth(self, rev):
1918 def snapshotdepth(self, rev):
1918 """number of snapshot in the chain before this one"""
1919 """number of snapshot in the chain before this one"""
1919 if not self.issnapshot(rev):
1920 if not self.issnapshot(rev):
1920 raise error.ProgrammingError(b'revision %d not a snapshot')
1921 raise error.ProgrammingError(b'revision %d not a snapshot')
1921 return len(self._deltachain(rev)[0]) - 1
1922 return len(self._deltachain(rev)[0]) - 1
1922
1923
1923 def revdiff(self, rev1, rev2):
1924 def revdiff(self, rev1, rev2):
1924 """return or calculate a delta between two revisions
1925 """return or calculate a delta between two revisions
1925
1926
1926 The delta calculated is in binary form and is intended to be written to
1927 The delta calculated is in binary form and is intended to be written to
1927 revlog data directly. So this function needs raw revision data.
1928 revlog data directly. So this function needs raw revision data.
1928 """
1929 """
1929 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1930 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1930 return bytes(self._chunk(rev2))
1931 return bytes(self._chunk(rev2))
1931
1932
1932 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1933 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1933
1934
1934 def revision(self, nodeorrev, _df=None):
1935 def revision(self, nodeorrev, _df=None):
1935 """return an uncompressed revision of a given node or revision
1936 """return an uncompressed revision of a given node or revision
1936 number.
1937 number.
1937
1938
1938 _df - an existing file handle to read from. (internal-only)
1939 _df - an existing file handle to read from. (internal-only)
1939 """
1940 """
1940 return self._revisiondata(nodeorrev, _df)
1941 return self._revisiondata(nodeorrev, _df)
1941
1942
1942 def sidedata(self, nodeorrev, _df=None):
1943 def sidedata(self, nodeorrev, _df=None):
1943 """a map of extra data related to the changeset but not part of the hash
1944 """a map of extra data related to the changeset but not part of the hash
1944
1945
1945 This function currently return a dictionary. However, more advanced
1946 This function currently return a dictionary. However, more advanced
1946 mapping object will likely be used in the future for a more
1947 mapping object will likely be used in the future for a more
1947 efficient/lazy code.
1948 efficient/lazy code.
1948 """
1949 """
1949 # deal with <nodeorrev> argument type
1950 # deal with <nodeorrev> argument type
1950 if isinstance(nodeorrev, int):
1951 if isinstance(nodeorrev, int):
1951 rev = nodeorrev
1952 rev = nodeorrev
1952 else:
1953 else:
1953 rev = self.rev(nodeorrev)
1954 rev = self.rev(nodeorrev)
1954 return self._sidedata(rev)
1955 return self._sidedata(rev)
1955
1956
1956 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1957 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1957 # deal with <nodeorrev> argument type
1958 # deal with <nodeorrev> argument type
1958 if isinstance(nodeorrev, int):
1959 if isinstance(nodeorrev, int):
1959 rev = nodeorrev
1960 rev = nodeorrev
1960 node = self.node(rev)
1961 node = self.node(rev)
1961 else:
1962 else:
1962 node = nodeorrev
1963 node = nodeorrev
1963 rev = None
1964 rev = None
1964
1965
1965 # fast path the special `nullid` rev
1966 # fast path the special `nullid` rev
1966 if node == self.nullid:
1967 if node == self.nullid:
1967 return b""
1968 return b""
1968
1969
1969 # ``rawtext`` is the text as stored inside the revlog. Might be the
1970 # ``rawtext`` is the text as stored inside the revlog. Might be the
1970 # revision or might need to be processed to retrieve the revision.
1971 # revision or might need to be processed to retrieve the revision.
1971 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1972 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1972
1973
1973 if raw and validated:
1974 if raw and validated:
1974 # if we don't want to process the raw text and that raw
1975 # if we don't want to process the raw text and that raw
1975 # text is cached, we can exit early.
1976 # text is cached, we can exit early.
1976 return rawtext
1977 return rawtext
1977 if rev is None:
1978 if rev is None:
1978 rev = self.rev(node)
1979 rev = self.rev(node)
1979 # the revlog's flag for this revision
1980 # the revlog's flag for this revision
1980 # (usually alter its state or content)
1981 # (usually alter its state or content)
1981 flags = self.flags(rev)
1982 flags = self.flags(rev)
1982
1983
1983 if validated and flags == REVIDX_DEFAULT_FLAGS:
1984 if validated and flags == REVIDX_DEFAULT_FLAGS:
1984 # no extra flags set, no flag processor runs, text = rawtext
1985 # no extra flags set, no flag processor runs, text = rawtext
1985 return rawtext
1986 return rawtext
1986
1987
1987 if raw:
1988 if raw:
1988 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1989 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1989 text = rawtext
1990 text = rawtext
1990 else:
1991 else:
1991 r = flagutil.processflagsread(self, rawtext, flags)
1992 r = flagutil.processflagsread(self, rawtext, flags)
1992 text, validatehash = r
1993 text, validatehash = r
1993 if validatehash:
1994 if validatehash:
1994 self.checkhash(text, node, rev=rev)
1995 self.checkhash(text, node, rev=rev)
1995 if not validated:
1996 if not validated:
1996 self._revisioncache = (node, rev, rawtext)
1997 self._revisioncache = (node, rev, rawtext)
1997
1998
1998 return text
1999 return text
1999
2000
2000 def _rawtext(self, node, rev, _df=None):
2001 def _rawtext(self, node, rev, _df=None):
2001 """return the possibly unvalidated rawtext for a revision
2002 """return the possibly unvalidated rawtext for a revision
2002
2003
2003 returns (rev, rawtext, validated)
2004 returns (rev, rawtext, validated)
2004 """
2005 """
2005
2006
2006 # revision in the cache (could be useful to apply delta)
2007 # revision in the cache (could be useful to apply delta)
2007 cachedrev = None
2008 cachedrev = None
2008 # An intermediate text to apply deltas to
2009 # An intermediate text to apply deltas to
2009 basetext = None
2010 basetext = None
2010
2011
2011 # Check if we have the entry in cache
2012 # Check if we have the entry in cache
2012 # The cache entry looks like (node, rev, rawtext)
2013 # The cache entry looks like (node, rev, rawtext)
2013 if self._revisioncache:
2014 if self._revisioncache:
2014 if self._revisioncache[0] == node:
2015 if self._revisioncache[0] == node:
2015 return (rev, self._revisioncache[2], True)
2016 return (rev, self._revisioncache[2], True)
2016 cachedrev = self._revisioncache[1]
2017 cachedrev = self._revisioncache[1]
2017
2018
2018 if rev is None:
2019 if rev is None:
2019 rev = self.rev(node)
2020 rev = self.rev(node)
2020
2021
2021 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2022 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2022 if stopped:
2023 if stopped:
2023 basetext = self._revisioncache[2]
2024 basetext = self._revisioncache[2]
2024
2025
2025 # drop cache to save memory, the caller is expected to
2026 # drop cache to save memory, the caller is expected to
2026 # update self._revisioncache after validating the text
2027 # update self._revisioncache after validating the text
2027 self._revisioncache = None
2028 self._revisioncache = None
2028
2029
2029 targetsize = None
2030 targetsize = None
2030 rawsize = self.index[rev][2]
2031 rawsize = self.index[rev][2]
2031 if 0 <= rawsize:
2032 if 0 <= rawsize:
2032 targetsize = 4 * rawsize
2033 targetsize = 4 * rawsize
2033
2034
2034 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2035 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2035 if basetext is None:
2036 if basetext is None:
2036 basetext = bytes(bins[0])
2037 basetext = bytes(bins[0])
2037 bins = bins[1:]
2038 bins = bins[1:]
2038
2039
2039 rawtext = mdiff.patches(basetext, bins)
2040 rawtext = mdiff.patches(basetext, bins)
2040 del basetext # let us have a chance to free memory early
2041 del basetext # let us have a chance to free memory early
2041 return (rev, rawtext, False)
2042 return (rev, rawtext, False)
2042
2043
2043 def _sidedata(self, rev):
2044 def _sidedata(self, rev):
2044 """Return the sidedata for a given revision number."""
2045 """Return the sidedata for a given revision number."""
2045 index_entry = self.index[rev]
2046 index_entry = self.index[rev]
2046 sidedata_offset = index_entry[8]
2047 sidedata_offset = index_entry[8]
2047 sidedata_size = index_entry[9]
2048 sidedata_size = index_entry[9]
2048
2049
2049 if self._inline:
2050 if self._inline:
2050 sidedata_offset += self.index.entry_size * (1 + rev)
2051 sidedata_offset += self.index.entry_size * (1 + rev)
2051 if sidedata_size == 0:
2052 if sidedata_size == 0:
2052 return {}
2053 return {}
2053
2054
2054 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2055 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2055 filename = self._sidedatafile
2056 filename = self._sidedatafile
2056 end = self._docket.sidedata_end
2057 end = self._docket.sidedata_end
2057 offset = sidedata_offset
2058 offset = sidedata_offset
2058 length = sidedata_size
2059 length = sidedata_size
2059 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2060 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2060 raise error.RevlogError(m)
2061 raise error.RevlogError(m)
2061
2062
2062 comp_segment = self._segmentfile_sidedata.read_chunk(
2063 comp_segment = self._segmentfile_sidedata.read_chunk(
2063 sidedata_offset, sidedata_size
2064 sidedata_offset, sidedata_size
2064 )
2065 )
2065
2066
2066 comp = self.index[rev][11]
2067 comp = self.index[rev][11]
2067 if comp == COMP_MODE_PLAIN:
2068 if comp == COMP_MODE_PLAIN:
2068 segment = comp_segment
2069 segment = comp_segment
2069 elif comp == COMP_MODE_DEFAULT:
2070 elif comp == COMP_MODE_DEFAULT:
2070 segment = self._decompressor(comp_segment)
2071 segment = self._decompressor(comp_segment)
2071 elif comp == COMP_MODE_INLINE:
2072 elif comp == COMP_MODE_INLINE:
2072 segment = self.decompress(comp_segment)
2073 segment = self.decompress(comp_segment)
2073 else:
2074 else:
2074 msg = b'unknown compression mode %d'
2075 msg = b'unknown compression mode %d'
2075 msg %= comp
2076 msg %= comp
2076 raise error.RevlogError(msg)
2077 raise error.RevlogError(msg)
2077
2078
2078 sidedata = sidedatautil.deserialize_sidedata(segment)
2079 sidedata = sidedatautil.deserialize_sidedata(segment)
2079 return sidedata
2080 return sidedata
2080
2081
2081 def rawdata(self, nodeorrev, _df=None):
2082 def rawdata(self, nodeorrev, _df=None):
2082 """return an uncompressed raw data of a given node or revision number.
2083 """return an uncompressed raw data of a given node or revision number.
2083
2084
2084 _df - an existing file handle to read from. (internal-only)
2085 _df - an existing file handle to read from. (internal-only)
2085 """
2086 """
2086 return self._revisiondata(nodeorrev, _df, raw=True)
2087 return self._revisiondata(nodeorrev, _df, raw=True)
2087
2088
2088 def hash(self, text, p1, p2):
2089 def hash(self, text, p1, p2):
2089 """Compute a node hash.
2090 """Compute a node hash.
2090
2091
2091 Available as a function so that subclasses can replace the hash
2092 Available as a function so that subclasses can replace the hash
2092 as needed.
2093 as needed.
2093 """
2094 """
2094 return storageutil.hashrevisionsha1(text, p1, p2)
2095 return storageutil.hashrevisionsha1(text, p1, p2)
2095
2096
2096 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2097 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2097 """Check node hash integrity.
2098 """Check node hash integrity.
2098
2099
2099 Available as a function so that subclasses can extend hash mismatch
2100 Available as a function so that subclasses can extend hash mismatch
2100 behaviors as needed.
2101 behaviors as needed.
2101 """
2102 """
2102 try:
2103 try:
2103 if p1 is None and p2 is None:
2104 if p1 is None and p2 is None:
2104 p1, p2 = self.parents(node)
2105 p1, p2 = self.parents(node)
2105 if node != self.hash(text, p1, p2):
2106 if node != self.hash(text, p1, p2):
2106 # Clear the revision cache on hash failure. The revision cache
2107 # Clear the revision cache on hash failure. The revision cache
2107 # only stores the raw revision and clearing the cache does have
2108 # only stores the raw revision and clearing the cache does have
2108 # the side-effect that we won't have a cache hit when the raw
2109 # the side-effect that we won't have a cache hit when the raw
2109 # revision data is accessed. But this case should be rare and
2110 # revision data is accessed. But this case should be rare and
2110 # it is extra work to teach the cache about the hash
2111 # it is extra work to teach the cache about the hash
2111 # verification state.
2112 # verification state.
2112 if self._revisioncache and self._revisioncache[0] == node:
2113 if self._revisioncache and self._revisioncache[0] == node:
2113 self._revisioncache = None
2114 self._revisioncache = None
2114
2115
2115 revornode = rev
2116 revornode = rev
2116 if revornode is None:
2117 if revornode is None:
2117 revornode = templatefilters.short(hex(node))
2118 revornode = templatefilters.short(hex(node))
2118 raise error.RevlogError(
2119 raise error.RevlogError(
2119 _(b"integrity check failed on %s:%s")
2120 _(b"integrity check failed on %s:%s")
2120 % (self.display_id, pycompat.bytestr(revornode))
2121 % (self.display_id, pycompat.bytestr(revornode))
2121 )
2122 )
2122 except error.RevlogError:
2123 except error.RevlogError:
2123 if self._censorable and storageutil.iscensoredtext(text):
2124 if self._censorable and storageutil.iscensoredtext(text):
2124 raise error.CensoredNodeError(self.display_id, node, text)
2125 raise error.CensoredNodeError(self.display_id, node, text)
2125 raise
2126 raise
2126
2127
2127 @property
2128 @property
2128 def _split_index_file(self):
2129 def _split_index_file(self):
2129 """the path where to expect the index of an ongoing splitting operation
2130 """the path where to expect the index of an ongoing splitting operation
2130
2131
2131 The file will only exist if a splitting operation is in progress, but
2132 The file will only exist if a splitting operation is in progress, but
2132 it is always expected at the same location."""
2133 it is always expected at the same location."""
2133 parts = self.radix.split(b'/')
2134 parts = self.radix.split(b'/')
2134 if len(parts) > 1:
2135 if len(parts) > 1:
2135 # adds a '-s' prefix to the ``data/` or `meta/` base
2136 # adds a '-s' prefix to the ``data/` or `meta/` base
2136 head = parts[0] + b'-s'
2137 head = parts[0] + b'-s'
2137 mids = parts[1:-1]
2138 mids = parts[1:-1]
2138 tail = parts[-1] + b'.i'
2139 tail = parts[-1] + b'.i'
2139 pieces = [head] + mids + [tail]
2140 pieces = [head] + mids + [tail]
2140 return b'/'.join(pieces)
2141 return b'/'.join(pieces)
2141 else:
2142 else:
2142 # the revlog is stored at the root of the store (changelog or
2143 # the revlog is stored at the root of the store (changelog or
2143 # manifest), no risk of collision.
2144 # manifest), no risk of collision.
2144 return self.radix + b'.i.s'
2145 return self.radix + b'.i.s'
2145
2146
2146 def _enforceinlinesize(self, tr, side_write=True):
2147 def _enforceinlinesize(self, tr, side_write=True):
2147 """Check if the revlog is too big for inline and convert if so.
2148 """Check if the revlog is too big for inline and convert if so.
2148
2149
2149 This should be called after revisions are added to the revlog. If the
2150 This should be called after revisions are added to the revlog. If the
2150 revlog has grown too large to be an inline revlog, it will convert it
2151 revlog has grown too large to be an inline revlog, it will convert it
2151 to use multiple index and data files.
2152 to use multiple index and data files.
2152 """
2153 """
2153 tiprev = len(self) - 1
2154 tiprev = len(self) - 1
2154 total_size = self.start(tiprev) + self.length(tiprev)
2155 total_size = self.start(tiprev) + self.length(tiprev)
2155 if not self._inline or total_size < _maxinline:
2156 if not self._inline or total_size < _maxinline:
2156 return
2157 return
2157
2158
2158 troffset = tr.findoffset(self._indexfile)
2159 troffset = tr.findoffset(self._indexfile)
2159 if troffset is None:
2160 if troffset is None:
2160 raise error.RevlogError(
2161 raise error.RevlogError(
2161 _(b"%s not found in the transaction") % self._indexfile
2162 _(b"%s not found in the transaction") % self._indexfile
2162 )
2163 )
2163 if troffset:
2164 if troffset:
2164 tr.addbackup(self._indexfile, for_offset=True)
2165 tr.addbackup(self._indexfile, for_offset=True)
2165 tr.add(self._datafile, 0)
2166 tr.add(self._datafile, 0)
2166
2167
2167 existing_handles = False
2168 existing_handles = False
2168 if self._writinghandles is not None:
2169 if self._writinghandles is not None:
2169 existing_handles = True
2170 existing_handles = True
2170 fp = self._writinghandles[0]
2171 fp = self._writinghandles[0]
2171 fp.flush()
2172 fp.flush()
2172 fp.close()
2173 fp.close()
2173 # We can't use the cached file handle after close(). So prevent
2174 # We can't use the cached file handle after close(). So prevent
2174 # its usage.
2175 # its usage.
2175 self._writinghandles = None
2176 self._writinghandles = None
2176 self._segmentfile.writing_handle = None
2177 self._segmentfile.writing_handle = None
2177 # No need to deal with sidedata writing handle as it is only
2178 # No need to deal with sidedata writing handle as it is only
2178 # relevant with revlog-v2 which is never inline, not reaching
2179 # relevant with revlog-v2 which is never inline, not reaching
2179 # this code
2180 # this code
2180 if side_write:
2181 if side_write:
2181 old_index_file_path = self._indexfile
2182 old_index_file_path = self._indexfile
2182 new_index_file_path = self._split_index_file
2183 new_index_file_path = self._split_index_file
2183 opener = self.opener
2184 opener = self.opener
2184 weak_self = weakref.ref(self)
2185 weak_self = weakref.ref(self)
2185
2186
2186 # the "split" index replace the real index when the transaction is finalized
2187 # the "split" index replace the real index when the transaction is finalized
2187 def finalize_callback(tr):
2188 def finalize_callback(tr):
2188 opener.rename(
2189 opener.rename(
2189 new_index_file_path,
2190 new_index_file_path,
2190 old_index_file_path,
2191 old_index_file_path,
2191 checkambig=True,
2192 checkambig=True,
2192 )
2193 )
2193 maybe_self = weak_self()
2194 maybe_self = weak_self()
2194 if maybe_self is not None:
2195 if maybe_self is not None:
2195 maybe_self._indexfile = old_index_file_path
2196 maybe_self._indexfile = old_index_file_path
2196
2197
2197 def abort_callback(tr):
2198 def abort_callback(tr):
2198 maybe_self = weak_self()
2199 maybe_self = weak_self()
2199 if maybe_self is not None:
2200 if maybe_self is not None:
2200 maybe_self._indexfile = old_index_file_path
2201 maybe_self._indexfile = old_index_file_path
2201
2202
2202 tr.registertmp(new_index_file_path)
2203 tr.registertmp(new_index_file_path)
2203 if self.target[1] is not None:
2204 if self.target[1] is not None:
2204 callback_id = b'000-revlog-split-%d-%s' % self.target
2205 callback_id = b'000-revlog-split-%d-%s' % self.target
2205 else:
2206 else:
2206 callback_id = b'000-revlog-split-%d' % self.target[0]
2207 callback_id = b'000-revlog-split-%d' % self.target[0]
2207 tr.addfinalize(callback_id, finalize_callback)
2208 tr.addfinalize(callback_id, finalize_callback)
2208 tr.addabort(callback_id, abort_callback)
2209 tr.addabort(callback_id, abort_callback)
2209
2210
2210 new_dfh = self._datafp(b'w+')
2211 new_dfh = self._datafp(b'w+')
2211 new_dfh.truncate(0) # drop any potentially existing data
2212 new_dfh.truncate(0) # drop any potentially existing data
2212 try:
2213 try:
2213 with self._indexfp() as read_ifh:
2214 with self._indexfp() as read_ifh:
2214 for r in self:
2215 for r in self:
2215 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2216 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2216 new_dfh.flush()
2217 new_dfh.flush()
2217
2218
2218 if side_write:
2219 if side_write:
2219 self._indexfile = new_index_file_path
2220 self._indexfile = new_index_file_path
2220 with self.__index_new_fp() as fp:
2221 with self.__index_new_fp() as fp:
2221 self._format_flags &= ~FLAG_INLINE_DATA
2222 self._format_flags &= ~FLAG_INLINE_DATA
2222 self._inline = False
2223 self._inline = False
2223 for i in self:
2224 for i in self:
2224 e = self.index.entry_binary(i)
2225 e = self.index.entry_binary(i)
2225 if i == 0 and self._docket is None:
2226 if i == 0 and self._docket is None:
2226 header = self._format_flags | self._format_version
2227 header = self._format_flags | self._format_version
2227 header = self.index.pack_header(header)
2228 header = self.index.pack_header(header)
2228 e = header + e
2229 e = header + e
2229 fp.write(e)
2230 fp.write(e)
2230 if self._docket is not None:
2231 if self._docket is not None:
2231 self._docket.index_end = fp.tell()
2232 self._docket.index_end = fp.tell()
2232
2233
2233 # If we don't use side-write, the temp file replace the real
2234 # If we don't use side-write, the temp file replace the real
2234 # index when we exit the context manager
2235 # index when we exit the context manager
2235
2236
2236 nodemaputil.setup_persistent_nodemap(tr, self)
2237 nodemaputil.setup_persistent_nodemap(tr, self)
2237 self._segmentfile = randomaccessfile.randomaccessfile(
2238 self._segmentfile = randomaccessfile.randomaccessfile(
2238 self.opener,
2239 self.opener,
2239 self._datafile,
2240 self._datafile,
2240 self._chunkcachesize,
2241 self._chunkcachesize,
2241 )
2242 )
2242
2243
2243 if existing_handles:
2244 if existing_handles:
2244 # switched from inline to conventional reopen the index
2245 # switched from inline to conventional reopen the index
2245 ifh = self.__index_write_fp()
2246 ifh = self.__index_write_fp()
2246 self._writinghandles = (ifh, new_dfh, None)
2247 self._writinghandles = (ifh, new_dfh, None)
2247 self._segmentfile.writing_handle = new_dfh
2248 self._segmentfile.writing_handle = new_dfh
2248 new_dfh = None
2249 new_dfh = None
2249 # No need to deal with sidedata writing handle as it is only
2250 # No need to deal with sidedata writing handle as it is only
2250 # relevant with revlog-v2 which is never inline, not reaching
2251 # relevant with revlog-v2 which is never inline, not reaching
2251 # this code
2252 # this code
2252 finally:
2253 finally:
2253 if new_dfh is not None:
2254 if new_dfh is not None:
2254 new_dfh.close()
2255 new_dfh.close()
2255
2256
2256 def _nodeduplicatecallback(self, transaction, node):
2257 def _nodeduplicatecallback(self, transaction, node):
2257 """called when trying to add a node already stored."""
2258 """called when trying to add a node already stored."""
2258
2259
2259 @contextlib.contextmanager
2260 @contextlib.contextmanager
2260 def reading(self):
2261 def reading(self):
2261 """Context manager that keeps data and sidedata files open for reading"""
2262 """Context manager that keeps data and sidedata files open for reading"""
2262 with self._segmentfile.reading():
2263 with self._segmentfile.reading():
2263 with self._segmentfile_sidedata.reading():
2264 with self._segmentfile_sidedata.reading():
2264 yield
2265 yield
2265
2266
2266 @contextlib.contextmanager
2267 @contextlib.contextmanager
2267 def _writing(self, transaction):
2268 def _writing(self, transaction):
2268 if self._trypending:
2269 if self._trypending:
2269 msg = b'try to write in a `trypending` revlog: %s'
2270 msg = b'try to write in a `trypending` revlog: %s'
2270 msg %= self.display_id
2271 msg %= self.display_id
2271 raise error.ProgrammingError(msg)
2272 raise error.ProgrammingError(msg)
2272 if self._writinghandles is not None:
2273 if self._writinghandles is not None:
2273 yield
2274 yield
2274 else:
2275 else:
2275 ifh = dfh = sdfh = None
2276 ifh = dfh = sdfh = None
2276 try:
2277 try:
2277 r = len(self)
2278 r = len(self)
2278 # opening the data file.
2279 # opening the data file.
2279 dsize = 0
2280 dsize = 0
2280 if r:
2281 if r:
2281 dsize = self.end(r - 1)
2282 dsize = self.end(r - 1)
2282 dfh = None
2283 dfh = None
2283 if not self._inline:
2284 if not self._inline:
2284 try:
2285 try:
2285 dfh = self._datafp(b"r+")
2286 dfh = self._datafp(b"r+")
2286 if self._docket is None:
2287 if self._docket is None:
2287 dfh.seek(0, os.SEEK_END)
2288 dfh.seek(0, os.SEEK_END)
2288 else:
2289 else:
2289 dfh.seek(self._docket.data_end, os.SEEK_SET)
2290 dfh.seek(self._docket.data_end, os.SEEK_SET)
2290 except FileNotFoundError:
2291 except FileNotFoundError:
2291 dfh = self._datafp(b"w+")
2292 dfh = self._datafp(b"w+")
2292 transaction.add(self._datafile, dsize)
2293 transaction.add(self._datafile, dsize)
2293 if self._sidedatafile is not None:
2294 if self._sidedatafile is not None:
2294 # revlog-v2 does not inline, help Pytype
2295 # revlog-v2 does not inline, help Pytype
2295 assert dfh is not None
2296 assert dfh is not None
2296 try:
2297 try:
2297 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2298 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2298 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2299 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2299 except FileNotFoundError:
2300 except FileNotFoundError:
2300 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2301 transaction.add(
2302 transaction.add(
2302 self._sidedatafile, self._docket.sidedata_end
2303 self._sidedatafile, self._docket.sidedata_end
2303 )
2304 )
2304
2305
2305 # opening the index file.
2306 # opening the index file.
2306 isize = r * self.index.entry_size
2307 isize = r * self.index.entry_size
2307 ifh = self.__index_write_fp()
2308 ifh = self.__index_write_fp()
2308 if self._inline:
2309 if self._inline:
2309 transaction.add(self._indexfile, dsize + isize)
2310 transaction.add(self._indexfile, dsize + isize)
2310 else:
2311 else:
2311 transaction.add(self._indexfile, isize)
2312 transaction.add(self._indexfile, isize)
2312 # exposing all file handle for writing.
2313 # exposing all file handle for writing.
2313 self._writinghandles = (ifh, dfh, sdfh)
2314 self._writinghandles = (ifh, dfh, sdfh)
2314 self._segmentfile.writing_handle = ifh if self._inline else dfh
2315 self._segmentfile.writing_handle = ifh if self._inline else dfh
2315 self._segmentfile_sidedata.writing_handle = sdfh
2316 self._segmentfile_sidedata.writing_handle = sdfh
2316 yield
2317 yield
2317 if self._docket is not None:
2318 if self._docket is not None:
2318 self._write_docket(transaction)
2319 self._write_docket(transaction)
2319 finally:
2320 finally:
2320 self._writinghandles = None
2321 self._writinghandles = None
2321 self._segmentfile.writing_handle = None
2322 self._segmentfile.writing_handle = None
2322 self._segmentfile_sidedata.writing_handle = None
2323 self._segmentfile_sidedata.writing_handle = None
2323 if dfh is not None:
2324 if dfh is not None:
2324 dfh.close()
2325 dfh.close()
2325 if sdfh is not None:
2326 if sdfh is not None:
2326 sdfh.close()
2327 sdfh.close()
2327 # closing the index file last to avoid exposing referent to
2328 # closing the index file last to avoid exposing referent to
2328 # potential unflushed data content.
2329 # potential unflushed data content.
2329 if ifh is not None:
2330 if ifh is not None:
2330 ifh.close()
2331 ifh.close()
2331
2332
2332 def _write_docket(self, transaction):
2333 def _write_docket(self, transaction):
2333 """write the current docket on disk
2334 """write the current docket on disk
2334
2335
2335 Exist as a method to help changelog to implement transaction logic
2336 Exist as a method to help changelog to implement transaction logic
2336
2337
2337 We could also imagine using the same transaction logic for all revlog
2338 We could also imagine using the same transaction logic for all revlog
2338 since docket are cheap."""
2339 since docket are cheap."""
2339 self._docket.write(transaction)
2340 self._docket.write(transaction)
2340
2341
2341 def addrevision(
2342 def addrevision(
2342 self,
2343 self,
2343 text,
2344 text,
2344 transaction,
2345 transaction,
2345 link,
2346 link,
2346 p1,
2347 p1,
2347 p2,
2348 p2,
2348 cachedelta=None,
2349 cachedelta=None,
2349 node=None,
2350 node=None,
2350 flags=REVIDX_DEFAULT_FLAGS,
2351 flags=REVIDX_DEFAULT_FLAGS,
2351 deltacomputer=None,
2352 deltacomputer=None,
2352 sidedata=None,
2353 sidedata=None,
2353 ):
2354 ):
2354 """add a revision to the log
2355 """add a revision to the log
2355
2356
2356 text - the revision data to add
2357 text - the revision data to add
2357 transaction - the transaction object used for rollback
2358 transaction - the transaction object used for rollback
2358 link - the linkrev data to add
2359 link - the linkrev data to add
2359 p1, p2 - the parent nodeids of the revision
2360 p1, p2 - the parent nodeids of the revision
2360 cachedelta - an optional precomputed delta
2361 cachedelta - an optional precomputed delta
2361 node - nodeid of revision; typically node is not specified, and it is
2362 node - nodeid of revision; typically node is not specified, and it is
2362 computed by default as hash(text, p1, p2), however subclasses might
2363 computed by default as hash(text, p1, p2), however subclasses might
2363 use different hashing method (and override checkhash() in such case)
2364 use different hashing method (and override checkhash() in such case)
2364 flags - the known flags to set on the revision
2365 flags - the known flags to set on the revision
2365 deltacomputer - an optional deltacomputer instance shared between
2366 deltacomputer - an optional deltacomputer instance shared between
2366 multiple calls
2367 multiple calls
2367 """
2368 """
2368 if link == nullrev:
2369 if link == nullrev:
2369 raise error.RevlogError(
2370 raise error.RevlogError(
2370 _(b"attempted to add linkrev -1 to %s") % self.display_id
2371 _(b"attempted to add linkrev -1 to %s") % self.display_id
2371 )
2372 )
2372
2373
2373 if sidedata is None:
2374 if sidedata is None:
2374 sidedata = {}
2375 sidedata = {}
2375 elif sidedata and not self.hassidedata:
2376 elif sidedata and not self.hassidedata:
2376 raise error.ProgrammingError(
2377 raise error.ProgrammingError(
2377 _(b"trying to add sidedata to a revlog who don't support them")
2378 _(b"trying to add sidedata to a revlog who don't support them")
2378 )
2379 )
2379
2380
2380 if flags:
2381 if flags:
2381 node = node or self.hash(text, p1, p2)
2382 node = node or self.hash(text, p1, p2)
2382
2383
2383 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2384 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2384
2385
2385 # If the flag processor modifies the revision data, ignore any provided
2386 # If the flag processor modifies the revision data, ignore any provided
2386 # cachedelta.
2387 # cachedelta.
2387 if rawtext != text:
2388 if rawtext != text:
2388 cachedelta = None
2389 cachedelta = None
2389
2390
2390 if len(rawtext) > _maxentrysize:
2391 if len(rawtext) > _maxentrysize:
2391 raise error.RevlogError(
2392 raise error.RevlogError(
2392 _(
2393 _(
2393 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2394 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2394 )
2395 )
2395 % (self.display_id, len(rawtext))
2396 % (self.display_id, len(rawtext))
2396 )
2397 )
2397
2398
2398 node = node or self.hash(rawtext, p1, p2)
2399 node = node or self.hash(rawtext, p1, p2)
2399 rev = self.index.get_rev(node)
2400 rev = self.index.get_rev(node)
2400 if rev is not None:
2401 if rev is not None:
2401 return rev
2402 return rev
2402
2403
2403 if validatehash:
2404 if validatehash:
2404 self.checkhash(rawtext, node, p1=p1, p2=p2)
2405 self.checkhash(rawtext, node, p1=p1, p2=p2)
2405
2406
2406 return self.addrawrevision(
2407 return self.addrawrevision(
2407 rawtext,
2408 rawtext,
2408 transaction,
2409 transaction,
2409 link,
2410 link,
2410 p1,
2411 p1,
2411 p2,
2412 p2,
2412 node,
2413 node,
2413 flags,
2414 flags,
2414 cachedelta=cachedelta,
2415 cachedelta=cachedelta,
2415 deltacomputer=deltacomputer,
2416 deltacomputer=deltacomputer,
2416 sidedata=sidedata,
2417 sidedata=sidedata,
2417 )
2418 )
2418
2419
2419 def addrawrevision(
2420 def addrawrevision(
2420 self,
2421 self,
2421 rawtext,
2422 rawtext,
2422 transaction,
2423 transaction,
2423 link,
2424 link,
2424 p1,
2425 p1,
2425 p2,
2426 p2,
2426 node,
2427 node,
2427 flags,
2428 flags,
2428 cachedelta=None,
2429 cachedelta=None,
2429 deltacomputer=None,
2430 deltacomputer=None,
2430 sidedata=None,
2431 sidedata=None,
2431 ):
2432 ):
2432 """add a raw revision with known flags, node and parents
2433 """add a raw revision with known flags, node and parents
2433 useful when reusing a revision not stored in this revlog (ex: received
2434 useful when reusing a revision not stored in this revlog (ex: received
2434 over wire, or read from an external bundle).
2435 over wire, or read from an external bundle).
2435 """
2436 """
2436 with self._writing(transaction):
2437 with self._writing(transaction):
2437 return self._addrevision(
2438 return self._addrevision(
2438 node,
2439 node,
2439 rawtext,
2440 rawtext,
2440 transaction,
2441 transaction,
2441 link,
2442 link,
2442 p1,
2443 p1,
2443 p2,
2444 p2,
2444 flags,
2445 flags,
2445 cachedelta,
2446 cachedelta,
2446 deltacomputer=deltacomputer,
2447 deltacomputer=deltacomputer,
2447 sidedata=sidedata,
2448 sidedata=sidedata,
2448 )
2449 )
2449
2450
2450 def compress(self, data):
2451 def compress(self, data):
2451 """Generate a possibly-compressed representation of data."""
2452 """Generate a possibly-compressed representation of data."""
2452 if not data:
2453 if not data:
2453 return b'', data
2454 return b'', data
2454
2455
2455 compressed = self._compressor.compress(data)
2456 compressed = self._compressor.compress(data)
2456
2457
2457 if compressed:
2458 if compressed:
2458 # The revlog compressor added the header in the returned data.
2459 # The revlog compressor added the header in the returned data.
2459 return b'', compressed
2460 return b'', compressed
2460
2461
2461 if data[0:1] == b'\0':
2462 if data[0:1] == b'\0':
2462 return b'', data
2463 return b'', data
2463 return b'u', data
2464 return b'u', data
2464
2465
2465 def decompress(self, data):
2466 def decompress(self, data):
2466 """Decompress a revlog chunk.
2467 """Decompress a revlog chunk.
2467
2468
2468 The chunk is expected to begin with a header identifying the
2469 The chunk is expected to begin with a header identifying the
2469 format type so it can be routed to an appropriate decompressor.
2470 format type so it can be routed to an appropriate decompressor.
2470 """
2471 """
2471 if not data:
2472 if not data:
2472 return data
2473 return data
2473
2474
2474 # Revlogs are read much more frequently than they are written and many
2475 # Revlogs are read much more frequently than they are written and many
2475 # chunks only take microseconds to decompress, so performance is
2476 # chunks only take microseconds to decompress, so performance is
2476 # important here.
2477 # important here.
2477 #
2478 #
2478 # We can make a few assumptions about revlogs:
2479 # We can make a few assumptions about revlogs:
2479 #
2480 #
2480 # 1) the majority of chunks will be compressed (as opposed to inline
2481 # 1) the majority of chunks will be compressed (as opposed to inline
2481 # raw data).
2482 # raw data).
2482 # 2) decompressing *any* data will likely by at least 10x slower than
2483 # 2) decompressing *any* data will likely by at least 10x slower than
2483 # returning raw inline data.
2484 # returning raw inline data.
2484 # 3) we want to prioritize common and officially supported compression
2485 # 3) we want to prioritize common and officially supported compression
2485 # engines
2486 # engines
2486 #
2487 #
2487 # It follows that we want to optimize for "decompress compressed data
2488 # It follows that we want to optimize for "decompress compressed data
2488 # when encoded with common and officially supported compression engines"
2489 # when encoded with common and officially supported compression engines"
2489 # case over "raw data" and "data encoded by less common or non-official
2490 # case over "raw data" and "data encoded by less common or non-official
2490 # compression engines." That is why we have the inline lookup first
2491 # compression engines." That is why we have the inline lookup first
2491 # followed by the compengines lookup.
2492 # followed by the compengines lookup.
2492 #
2493 #
2493 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2494 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2494 # compressed chunks. And this matters for changelog and manifest reads.
2495 # compressed chunks. And this matters for changelog and manifest reads.
2495 t = data[0:1]
2496 t = data[0:1]
2496
2497
2497 if t == b'x':
2498 if t == b'x':
2498 try:
2499 try:
2499 return _zlibdecompress(data)
2500 return _zlibdecompress(data)
2500 except zlib.error as e:
2501 except zlib.error as e:
2501 raise error.RevlogError(
2502 raise error.RevlogError(
2502 _(b'revlog decompress error: %s')
2503 _(b'revlog decompress error: %s')
2503 % stringutil.forcebytestr(e)
2504 % stringutil.forcebytestr(e)
2504 )
2505 )
2505 # '\0' is more common than 'u' so it goes first.
2506 # '\0' is more common than 'u' so it goes first.
2506 elif t == b'\0':
2507 elif t == b'\0':
2507 return data
2508 return data
2508 elif t == b'u':
2509 elif t == b'u':
2509 return util.buffer(data, 1)
2510 return util.buffer(data, 1)
2510
2511
2511 compressor = self._get_decompressor(t)
2512 compressor = self._get_decompressor(t)
2512
2513
2513 return compressor.decompress(data)
2514 return compressor.decompress(data)
2514
2515
2515 def _addrevision(
2516 def _addrevision(
2516 self,
2517 self,
2517 node,
2518 node,
2518 rawtext,
2519 rawtext,
2519 transaction,
2520 transaction,
2520 link,
2521 link,
2521 p1,
2522 p1,
2522 p2,
2523 p2,
2523 flags,
2524 flags,
2524 cachedelta,
2525 cachedelta,
2525 alwayscache=False,
2526 alwayscache=False,
2526 deltacomputer=None,
2527 deltacomputer=None,
2527 sidedata=None,
2528 sidedata=None,
2528 ):
2529 ):
2529 """internal function to add revisions to the log
2530 """internal function to add revisions to the log
2530
2531
2531 see addrevision for argument descriptions.
2532 see addrevision for argument descriptions.
2532
2533
2533 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2534 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2534
2535
2535 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2536 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2536 be used.
2537 be used.
2537
2538
2538 invariants:
2539 invariants:
2539 - rawtext is optional (can be None); if not set, cachedelta must be set.
2540 - rawtext is optional (can be None); if not set, cachedelta must be set.
2540 if both are set, they must correspond to each other.
2541 if both are set, they must correspond to each other.
2541 """
2542 """
2542 if node == self.nullid:
2543 if node == self.nullid:
2543 raise error.RevlogError(
2544 raise error.RevlogError(
2544 _(b"%s: attempt to add null revision") % self.display_id
2545 _(b"%s: attempt to add null revision") % self.display_id
2545 )
2546 )
2546 if (
2547 if (
2547 node == self.nodeconstants.wdirid
2548 node == self.nodeconstants.wdirid
2548 or node in self.nodeconstants.wdirfilenodeids
2549 or node in self.nodeconstants.wdirfilenodeids
2549 ):
2550 ):
2550 raise error.RevlogError(
2551 raise error.RevlogError(
2551 _(b"%s: attempt to add wdir revision") % self.display_id
2552 _(b"%s: attempt to add wdir revision") % self.display_id
2552 )
2553 )
2553 if self._writinghandles is None:
2554 if self._writinghandles is None:
2554 msg = b'adding revision outside `revlog._writing` context'
2555 msg = b'adding revision outside `revlog._writing` context'
2555 raise error.ProgrammingError(msg)
2556 raise error.ProgrammingError(msg)
2556
2557
2557 if self._inline:
2558 if self._inline:
2558 fh = self._writinghandles[0]
2559 fh = self._writinghandles[0]
2559 else:
2560 else:
2560 fh = self._writinghandles[1]
2561 fh = self._writinghandles[1]
2561
2562
2562 btext = [rawtext]
2563 btext = [rawtext]
2563
2564
2564 curr = len(self)
2565 curr = len(self)
2565 prev = curr - 1
2566 prev = curr - 1
2566
2567
2567 offset = self._get_data_offset(prev)
2568 offset = self._get_data_offset(prev)
2568
2569
2569 if self._concurrencychecker:
2570 if self._concurrencychecker:
2570 ifh, dfh, sdfh = self._writinghandles
2571 ifh, dfh, sdfh = self._writinghandles
2571 # XXX no checking for the sidedata file
2572 # XXX no checking for the sidedata file
2572 if self._inline:
2573 if self._inline:
2573 # offset is "as if" it were in the .d file, so we need to add on
2574 # offset is "as if" it were in the .d file, so we need to add on
2574 # the size of the entry metadata.
2575 # the size of the entry metadata.
2575 self._concurrencychecker(
2576 self._concurrencychecker(
2576 ifh, self._indexfile, offset + curr * self.index.entry_size
2577 ifh, self._indexfile, offset + curr * self.index.entry_size
2577 )
2578 )
2578 else:
2579 else:
2579 # Entries in the .i are a consistent size.
2580 # Entries in the .i are a consistent size.
2580 self._concurrencychecker(
2581 self._concurrencychecker(
2581 ifh, self._indexfile, curr * self.index.entry_size
2582 ifh, self._indexfile, curr * self.index.entry_size
2582 )
2583 )
2583 self._concurrencychecker(dfh, self._datafile, offset)
2584 self._concurrencychecker(dfh, self._datafile, offset)
2584
2585
2585 p1r, p2r = self.rev(p1), self.rev(p2)
2586 p1r, p2r = self.rev(p1), self.rev(p2)
2586
2587
2587 # full versions are inserted when the needed deltas
2588 # full versions are inserted when the needed deltas
2588 # become comparable to the uncompressed text
2589 # become comparable to the uncompressed text
2589 if rawtext is None:
2590 if rawtext is None:
2590 # need rawtext size, before changed by flag processors, which is
2591 # need rawtext size, before changed by flag processors, which is
2591 # the non-raw size. use revlog explicitly to avoid filelog's extra
2592 # the non-raw size. use revlog explicitly to avoid filelog's extra
2592 # logic that might remove metadata size.
2593 # logic that might remove metadata size.
2593 textlen = mdiff.patchedsize(
2594 textlen = mdiff.patchedsize(
2594 revlog.size(self, cachedelta[0]), cachedelta[1]
2595 revlog.size(self, cachedelta[0]), cachedelta[1]
2595 )
2596 )
2596 else:
2597 else:
2597 textlen = len(rawtext)
2598 textlen = len(rawtext)
2598
2599
2599 if deltacomputer is None:
2600 if deltacomputer is None:
2600 write_debug = None
2601 write_debug = None
2601 if self._debug_delta:
2602 if self._debug_delta:
2602 write_debug = transaction._report
2603 write_debug = transaction._report
2603 deltacomputer = deltautil.deltacomputer(
2604 deltacomputer = deltautil.deltacomputer(
2604 self, write_debug=write_debug
2605 self, write_debug=write_debug
2605 )
2606 )
2606
2607
2607 if cachedelta is not None and len(cachedelta) == 2:
2608 if cachedelta is not None and len(cachedelta) == 2:
2608 # If the cached delta has no information about how it should be
2609 # If the cached delta has no information about how it should be
2609 # reused, add the default reuse instruction according to the
2610 # reused, add the default reuse instruction according to the
2610 # revlog's configuration.
2611 # revlog's configuration.
2611 if self._generaldelta and self._lazydeltabase:
2612 if self._generaldelta and self._lazydeltabase:
2612 delta_base_reuse = DELTA_BASE_REUSE_TRY
2613 delta_base_reuse = DELTA_BASE_REUSE_TRY
2613 else:
2614 else:
2614 delta_base_reuse = DELTA_BASE_REUSE_NO
2615 delta_base_reuse = DELTA_BASE_REUSE_NO
2615 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2616 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2616
2617
2617 revinfo = revlogutils.revisioninfo(
2618 revinfo = revlogutils.revisioninfo(
2618 node,
2619 node,
2619 p1,
2620 p1,
2620 p2,
2621 p2,
2621 btext,
2622 btext,
2622 textlen,
2623 textlen,
2623 cachedelta,
2624 cachedelta,
2624 flags,
2625 flags,
2625 )
2626 )
2626
2627
2627 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2628 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2628
2629
2629 compression_mode = COMP_MODE_INLINE
2630 compression_mode = COMP_MODE_INLINE
2630 if self._docket is not None:
2631 if self._docket is not None:
2631 default_comp = self._docket.default_compression_header
2632 default_comp = self._docket.default_compression_header
2632 r = deltautil.delta_compression(default_comp, deltainfo)
2633 r = deltautil.delta_compression(default_comp, deltainfo)
2633 compression_mode, deltainfo = r
2634 compression_mode, deltainfo = r
2634
2635
2635 sidedata_compression_mode = COMP_MODE_INLINE
2636 sidedata_compression_mode = COMP_MODE_INLINE
2636 if sidedata and self.hassidedata:
2637 if sidedata and self.hassidedata:
2637 sidedata_compression_mode = COMP_MODE_PLAIN
2638 sidedata_compression_mode = COMP_MODE_PLAIN
2638 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2639 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2639 sidedata_offset = self._docket.sidedata_end
2640 sidedata_offset = self._docket.sidedata_end
2640 h, comp_sidedata = self.compress(serialized_sidedata)
2641 h, comp_sidedata = self.compress(serialized_sidedata)
2641 if (
2642 if (
2642 h != b'u'
2643 h != b'u'
2643 and comp_sidedata[0:1] != b'\0'
2644 and comp_sidedata[0:1] != b'\0'
2644 and len(comp_sidedata) < len(serialized_sidedata)
2645 and len(comp_sidedata) < len(serialized_sidedata)
2645 ):
2646 ):
2646 assert not h
2647 assert not h
2647 if (
2648 if (
2648 comp_sidedata[0:1]
2649 comp_sidedata[0:1]
2649 == self._docket.default_compression_header
2650 == self._docket.default_compression_header
2650 ):
2651 ):
2651 sidedata_compression_mode = COMP_MODE_DEFAULT
2652 sidedata_compression_mode = COMP_MODE_DEFAULT
2652 serialized_sidedata = comp_sidedata
2653 serialized_sidedata = comp_sidedata
2653 else:
2654 else:
2654 sidedata_compression_mode = COMP_MODE_INLINE
2655 sidedata_compression_mode = COMP_MODE_INLINE
2655 serialized_sidedata = comp_sidedata
2656 serialized_sidedata = comp_sidedata
2656 else:
2657 else:
2657 serialized_sidedata = b""
2658 serialized_sidedata = b""
2658 # Don't store the offset if the sidedata is empty, that way
2659 # Don't store the offset if the sidedata is empty, that way
2659 # we can easily detect empty sidedata and they will be no different
2660 # we can easily detect empty sidedata and they will be no different
2660 # than ones we manually add.
2661 # than ones we manually add.
2661 sidedata_offset = 0
2662 sidedata_offset = 0
2662
2663
2663 rank = RANK_UNKNOWN
2664 rank = RANK_UNKNOWN
2664 if self._compute_rank:
2665 if self._compute_rank:
2665 if (p1r, p2r) == (nullrev, nullrev):
2666 if (p1r, p2r) == (nullrev, nullrev):
2666 rank = 1
2667 rank = 1
2667 elif p1r != nullrev and p2r == nullrev:
2668 elif p1r != nullrev and p2r == nullrev:
2668 rank = 1 + self.fast_rank(p1r)
2669 rank = 1 + self.fast_rank(p1r)
2669 elif p1r == nullrev and p2r != nullrev:
2670 elif p1r == nullrev and p2r != nullrev:
2670 rank = 1 + self.fast_rank(p2r)
2671 rank = 1 + self.fast_rank(p2r)
2671 else: # merge node
2672 else: # merge node
2672 if rustdagop is not None and self.index.rust_ext_compat:
2673 if rustdagop is not None and self.index.rust_ext_compat:
2673 rank = rustdagop.rank(self.index, p1r, p2r)
2674 rank = rustdagop.rank(self.index, p1r, p2r)
2674 else:
2675 else:
2675 pmin, pmax = sorted((p1r, p2r))
2676 pmin, pmax = sorted((p1r, p2r))
2676 rank = 1 + self.fast_rank(pmax)
2677 rank = 1 + self.fast_rank(pmax)
2677 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2678 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2678
2679
2679 e = revlogutils.entry(
2680 e = revlogutils.entry(
2680 flags=flags,
2681 flags=flags,
2681 data_offset=offset,
2682 data_offset=offset,
2682 data_compressed_length=deltainfo.deltalen,
2683 data_compressed_length=deltainfo.deltalen,
2683 data_uncompressed_length=textlen,
2684 data_uncompressed_length=textlen,
2684 data_compression_mode=compression_mode,
2685 data_compression_mode=compression_mode,
2685 data_delta_base=deltainfo.base,
2686 data_delta_base=deltainfo.base,
2686 link_rev=link,
2687 link_rev=link,
2687 parent_rev_1=p1r,
2688 parent_rev_1=p1r,
2688 parent_rev_2=p2r,
2689 parent_rev_2=p2r,
2689 node_id=node,
2690 node_id=node,
2690 sidedata_offset=sidedata_offset,
2691 sidedata_offset=sidedata_offset,
2691 sidedata_compressed_length=len(serialized_sidedata),
2692 sidedata_compressed_length=len(serialized_sidedata),
2692 sidedata_compression_mode=sidedata_compression_mode,
2693 sidedata_compression_mode=sidedata_compression_mode,
2693 rank=rank,
2694 rank=rank,
2694 )
2695 )
2695
2696
2696 self.index.append(e)
2697 self.index.append(e)
2697 entry = self.index.entry_binary(curr)
2698 entry = self.index.entry_binary(curr)
2698 if curr == 0 and self._docket is None:
2699 if curr == 0 and self._docket is None:
2699 header = self._format_flags | self._format_version
2700 header = self._format_flags | self._format_version
2700 header = self.index.pack_header(header)
2701 header = self.index.pack_header(header)
2701 entry = header + entry
2702 entry = header + entry
2702 self._writeentry(
2703 self._writeentry(
2703 transaction,
2704 transaction,
2704 entry,
2705 entry,
2705 deltainfo.data,
2706 deltainfo.data,
2706 link,
2707 link,
2707 offset,
2708 offset,
2708 serialized_sidedata,
2709 serialized_sidedata,
2709 sidedata_offset,
2710 sidedata_offset,
2710 )
2711 )
2711
2712
2712 rawtext = btext[0]
2713 rawtext = btext[0]
2713
2714
2714 if alwayscache and rawtext is None:
2715 if alwayscache and rawtext is None:
2715 rawtext = deltacomputer.buildtext(revinfo, fh)
2716 rawtext = deltacomputer.buildtext(revinfo, fh)
2716
2717
2717 if type(rawtext) == bytes: # only accept immutable objects
2718 if type(rawtext) == bytes: # only accept immutable objects
2718 self._revisioncache = (node, curr, rawtext)
2719 self._revisioncache = (node, curr, rawtext)
2719 self._chainbasecache[curr] = deltainfo.chainbase
2720 self._chainbasecache[curr] = deltainfo.chainbase
2720 return curr
2721 return curr
2721
2722
2722 def _get_data_offset(self, prev):
2723 def _get_data_offset(self, prev):
2723 """Returns the current offset in the (in-transaction) data file.
2724 """Returns the current offset in the (in-transaction) data file.
2724 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2725 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2725 file to store that information: since sidedata can be rewritten to the
2726 file to store that information: since sidedata can be rewritten to the
2726 end of the data file within a transaction, you can have cases where, for
2727 end of the data file within a transaction, you can have cases where, for
2727 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2728 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2728 to `n - 1`'s sidedata being written after `n`'s data.
2729 to `n - 1`'s sidedata being written after `n`'s data.
2729
2730
2730 TODO cache this in a docket file before getting out of experimental."""
2731 TODO cache this in a docket file before getting out of experimental."""
2731 if self._docket is None:
2732 if self._docket is None:
2732 return self.end(prev)
2733 return self.end(prev)
2733 else:
2734 else:
2734 return self._docket.data_end
2735 return self._docket.data_end
2735
2736
2736 def _writeentry(
2737 def _writeentry(
2737 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2738 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2738 ):
2739 ):
2739 # Files opened in a+ mode have inconsistent behavior on various
2740 # Files opened in a+ mode have inconsistent behavior on various
2740 # platforms. Windows requires that a file positioning call be made
2741 # platforms. Windows requires that a file positioning call be made
2741 # when the file handle transitions between reads and writes. See
2742 # when the file handle transitions between reads and writes. See
2742 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2743 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2743 # platforms, Python or the platform itself can be buggy. Some versions
2744 # platforms, Python or the platform itself can be buggy. Some versions
2744 # of Solaris have been observed to not append at the end of the file
2745 # of Solaris have been observed to not append at the end of the file
2745 # if the file was seeked to before the end. See issue4943 for more.
2746 # if the file was seeked to before the end. See issue4943 for more.
2746 #
2747 #
2747 # We work around this issue by inserting a seek() before writing.
2748 # We work around this issue by inserting a seek() before writing.
2748 # Note: This is likely not necessary on Python 3. However, because
2749 # Note: This is likely not necessary on Python 3. However, because
2749 # the file handle is reused for reads and may be seeked there, we need
2750 # the file handle is reused for reads and may be seeked there, we need
2750 # to be careful before changing this.
2751 # to be careful before changing this.
2751 if self._writinghandles is None:
2752 if self._writinghandles is None:
2752 msg = b'adding revision outside `revlog._writing` context'
2753 msg = b'adding revision outside `revlog._writing` context'
2753 raise error.ProgrammingError(msg)
2754 raise error.ProgrammingError(msg)
2754 ifh, dfh, sdfh = self._writinghandles
2755 ifh, dfh, sdfh = self._writinghandles
2755 if self._docket is None:
2756 if self._docket is None:
2756 ifh.seek(0, os.SEEK_END)
2757 ifh.seek(0, os.SEEK_END)
2757 else:
2758 else:
2758 ifh.seek(self._docket.index_end, os.SEEK_SET)
2759 ifh.seek(self._docket.index_end, os.SEEK_SET)
2759 if dfh:
2760 if dfh:
2760 if self._docket is None:
2761 if self._docket is None:
2761 dfh.seek(0, os.SEEK_END)
2762 dfh.seek(0, os.SEEK_END)
2762 else:
2763 else:
2763 dfh.seek(self._docket.data_end, os.SEEK_SET)
2764 dfh.seek(self._docket.data_end, os.SEEK_SET)
2764 if sdfh:
2765 if sdfh:
2765 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2766 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2766
2767
2767 curr = len(self) - 1
2768 curr = len(self) - 1
2768 if not self._inline:
2769 if not self._inline:
2769 transaction.add(self._datafile, offset)
2770 transaction.add(self._datafile, offset)
2770 if self._sidedatafile:
2771 if self._sidedatafile:
2771 transaction.add(self._sidedatafile, sidedata_offset)
2772 transaction.add(self._sidedatafile, sidedata_offset)
2772 transaction.add(self._indexfile, curr * len(entry))
2773 transaction.add(self._indexfile, curr * len(entry))
2773 if data[0]:
2774 if data[0]:
2774 dfh.write(data[0])
2775 dfh.write(data[0])
2775 dfh.write(data[1])
2776 dfh.write(data[1])
2776 if sidedata:
2777 if sidedata:
2777 sdfh.write(sidedata)
2778 sdfh.write(sidedata)
2778 ifh.write(entry)
2779 ifh.write(entry)
2779 else:
2780 else:
2780 offset += curr * self.index.entry_size
2781 offset += curr * self.index.entry_size
2781 transaction.add(self._indexfile, offset)
2782 transaction.add(self._indexfile, offset)
2782 ifh.write(entry)
2783 ifh.write(entry)
2783 ifh.write(data[0])
2784 ifh.write(data[0])
2784 ifh.write(data[1])
2785 ifh.write(data[1])
2785 assert not sidedata
2786 assert not sidedata
2786 self._enforceinlinesize(transaction)
2787 self._enforceinlinesize(transaction)
2787 if self._docket is not None:
2788 if self._docket is not None:
2788 # revlog-v2 always has 3 writing handles, help Pytype
2789 # revlog-v2 always has 3 writing handles, help Pytype
2789 wh1 = self._writinghandles[0]
2790 wh1 = self._writinghandles[0]
2790 wh2 = self._writinghandles[1]
2791 wh2 = self._writinghandles[1]
2791 wh3 = self._writinghandles[2]
2792 wh3 = self._writinghandles[2]
2792 assert wh1 is not None
2793 assert wh1 is not None
2793 assert wh2 is not None
2794 assert wh2 is not None
2794 assert wh3 is not None
2795 assert wh3 is not None
2795 self._docket.index_end = wh1.tell()
2796 self._docket.index_end = wh1.tell()
2796 self._docket.data_end = wh2.tell()
2797 self._docket.data_end = wh2.tell()
2797 self._docket.sidedata_end = wh3.tell()
2798 self._docket.sidedata_end = wh3.tell()
2798
2799
2799 nodemaputil.setup_persistent_nodemap(transaction, self)
2800 nodemaputil.setup_persistent_nodemap(transaction, self)
2800
2801
2801 def addgroup(
2802 def addgroup(
2802 self,
2803 self,
2803 deltas,
2804 deltas,
2804 linkmapper,
2805 linkmapper,
2805 transaction,
2806 transaction,
2806 alwayscache=False,
2807 alwayscache=False,
2807 addrevisioncb=None,
2808 addrevisioncb=None,
2808 duplicaterevisioncb=None,
2809 duplicaterevisioncb=None,
2809 debug_info=None,
2810 debug_info=None,
2810 delta_base_reuse_policy=None,
2811 delta_base_reuse_policy=None,
2811 ):
2812 ):
2812 """
2813 """
2813 add a delta group
2814 add a delta group
2814
2815
2815 given a set of deltas, add them to the revision log. the
2816 given a set of deltas, add them to the revision log. the
2816 first delta is against its parent, which should be in our
2817 first delta is against its parent, which should be in our
2817 log, the rest are against the previous delta.
2818 log, the rest are against the previous delta.
2818
2819
2819 If ``addrevisioncb`` is defined, it will be called with arguments of
2820 If ``addrevisioncb`` is defined, it will be called with arguments of
2820 this revlog and the node that was added.
2821 this revlog and the node that was added.
2821 """
2822 """
2822
2823
2823 if self._adding_group:
2824 if self._adding_group:
2824 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2825 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2825
2826
2826 # read the default delta-base reuse policy from revlog config if the
2827 # read the default delta-base reuse policy from revlog config if the
2827 # group did not specify one.
2828 # group did not specify one.
2828 if delta_base_reuse_policy is None:
2829 if delta_base_reuse_policy is None:
2829 if self._generaldelta and self._lazydeltabase:
2830 if self._generaldelta and self._lazydeltabase:
2830 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2831 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2831 else:
2832 else:
2832 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2833 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2833
2834
2834 self._adding_group = True
2835 self._adding_group = True
2835 empty = True
2836 empty = True
2836 try:
2837 try:
2837 with self._writing(transaction):
2838 with self._writing(transaction):
2838 write_debug = None
2839 write_debug = None
2839 if self._debug_delta:
2840 if self._debug_delta:
2840 write_debug = transaction._report
2841 write_debug = transaction._report
2841 deltacomputer = deltautil.deltacomputer(
2842 deltacomputer = deltautil.deltacomputer(
2842 self,
2843 self,
2843 write_debug=write_debug,
2844 write_debug=write_debug,
2844 debug_info=debug_info,
2845 debug_info=debug_info,
2845 )
2846 )
2846 # loop through our set of deltas
2847 # loop through our set of deltas
2847 for data in deltas:
2848 for data in deltas:
2848 (
2849 (
2849 node,
2850 node,
2850 p1,
2851 p1,
2851 p2,
2852 p2,
2852 linknode,
2853 linknode,
2853 deltabase,
2854 deltabase,
2854 delta,
2855 delta,
2855 flags,
2856 flags,
2856 sidedata,
2857 sidedata,
2857 ) = data
2858 ) = data
2858 link = linkmapper(linknode)
2859 link = linkmapper(linknode)
2859 flags = flags or REVIDX_DEFAULT_FLAGS
2860 flags = flags or REVIDX_DEFAULT_FLAGS
2860
2861
2861 rev = self.index.get_rev(node)
2862 rev = self.index.get_rev(node)
2862 if rev is not None:
2863 if rev is not None:
2863 # this can happen if two branches make the same change
2864 # this can happen if two branches make the same change
2864 self._nodeduplicatecallback(transaction, rev)
2865 self._nodeduplicatecallback(transaction, rev)
2865 if duplicaterevisioncb:
2866 if duplicaterevisioncb:
2866 duplicaterevisioncb(self, rev)
2867 duplicaterevisioncb(self, rev)
2867 empty = False
2868 empty = False
2868 continue
2869 continue
2869
2870
2870 for p in (p1, p2):
2871 for p in (p1, p2):
2871 if not self.index.has_node(p):
2872 if not self.index.has_node(p):
2872 raise error.LookupError(
2873 raise error.LookupError(
2873 p, self.radix, _(b'unknown parent')
2874 p, self.radix, _(b'unknown parent')
2874 )
2875 )
2875
2876
2876 if not self.index.has_node(deltabase):
2877 if not self.index.has_node(deltabase):
2877 raise error.LookupError(
2878 raise error.LookupError(
2878 deltabase, self.display_id, _(b'unknown delta base')
2879 deltabase, self.display_id, _(b'unknown delta base')
2879 )
2880 )
2880
2881
2881 baserev = self.rev(deltabase)
2882 baserev = self.rev(deltabase)
2882
2883
2883 if baserev != nullrev and self.iscensored(baserev):
2884 if baserev != nullrev and self.iscensored(baserev):
2884 # if base is censored, delta must be full replacement in a
2885 # if base is censored, delta must be full replacement in a
2885 # single patch operation
2886 # single patch operation
2886 hlen = struct.calcsize(b">lll")
2887 hlen = struct.calcsize(b">lll")
2887 oldlen = self.rawsize(baserev)
2888 oldlen = self.rawsize(baserev)
2888 newlen = len(delta) - hlen
2889 newlen = len(delta) - hlen
2889 if delta[:hlen] != mdiff.replacediffheader(
2890 if delta[:hlen] != mdiff.replacediffheader(
2890 oldlen, newlen
2891 oldlen, newlen
2891 ):
2892 ):
2892 raise error.CensoredBaseError(
2893 raise error.CensoredBaseError(
2893 self.display_id, self.node(baserev)
2894 self.display_id, self.node(baserev)
2894 )
2895 )
2895
2896
2896 if not flags and self._peek_iscensored(baserev, delta):
2897 if not flags and self._peek_iscensored(baserev, delta):
2897 flags |= REVIDX_ISCENSORED
2898 flags |= REVIDX_ISCENSORED
2898
2899
2899 # We assume consumers of addrevisioncb will want to retrieve
2900 # We assume consumers of addrevisioncb will want to retrieve
2900 # the added revision, which will require a call to
2901 # the added revision, which will require a call to
2901 # revision(). revision() will fast path if there is a cache
2902 # revision(). revision() will fast path if there is a cache
2902 # hit. So, we tell _addrevision() to always cache in this case.
2903 # hit. So, we tell _addrevision() to always cache in this case.
2903 # We're only using addgroup() in the context of changegroup
2904 # We're only using addgroup() in the context of changegroup
2904 # generation so the revision data can always be handled as raw
2905 # generation so the revision data can always be handled as raw
2905 # by the flagprocessor.
2906 # by the flagprocessor.
2906 rev = self._addrevision(
2907 rev = self._addrevision(
2907 node,
2908 node,
2908 None,
2909 None,
2909 transaction,
2910 transaction,
2910 link,
2911 link,
2911 p1,
2912 p1,
2912 p2,
2913 p2,
2913 flags,
2914 flags,
2914 (baserev, delta, delta_base_reuse_policy),
2915 (baserev, delta, delta_base_reuse_policy),
2915 alwayscache=alwayscache,
2916 alwayscache=alwayscache,
2916 deltacomputer=deltacomputer,
2917 deltacomputer=deltacomputer,
2917 sidedata=sidedata,
2918 sidedata=sidedata,
2918 )
2919 )
2919
2920
2920 if addrevisioncb:
2921 if addrevisioncb:
2921 addrevisioncb(self, rev)
2922 addrevisioncb(self, rev)
2922 empty = False
2923 empty = False
2923 finally:
2924 finally:
2924 self._adding_group = False
2925 self._adding_group = False
2925 return not empty
2926 return not empty
2926
2927
2927 def iscensored(self, rev):
2928 def iscensored(self, rev):
2928 """Check if a file revision is censored."""
2929 """Check if a file revision is censored."""
2929 if not self._censorable:
2930 if not self._censorable:
2930 return False
2931 return False
2931
2932
2932 return self.flags(rev) & REVIDX_ISCENSORED
2933 return self.flags(rev) & REVIDX_ISCENSORED
2933
2934
2934 def _peek_iscensored(self, baserev, delta):
2935 def _peek_iscensored(self, baserev, delta):
2935 """Quickly check if a delta produces a censored revision."""
2936 """Quickly check if a delta produces a censored revision."""
2936 if not self._censorable:
2937 if not self._censorable:
2937 return False
2938 return False
2938
2939
2939 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2940 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2940
2941
2941 def getstrippoint(self, minlink):
2942 def getstrippoint(self, minlink):
2942 """find the minimum rev that must be stripped to strip the linkrev
2943 """find the minimum rev that must be stripped to strip the linkrev
2943
2944
2944 Returns a tuple containing the minimum rev and a set of all revs that
2945 Returns a tuple containing the minimum rev and a set of all revs that
2945 have linkrevs that will be broken by this strip.
2946 have linkrevs that will be broken by this strip.
2946 """
2947 """
2947 return storageutil.resolvestripinfo(
2948 return storageutil.resolvestripinfo(
2948 minlink,
2949 minlink,
2949 len(self) - 1,
2950 len(self) - 1,
2950 self.headrevs(),
2951 self.headrevs(),
2951 self.linkrev,
2952 self.linkrev,
2952 self.parentrevs,
2953 self.parentrevs,
2953 )
2954 )
2954
2955
2955 def strip(self, minlink, transaction):
2956 def strip(self, minlink, transaction):
2956 """truncate the revlog on the first revision with a linkrev >= minlink
2957 """truncate the revlog on the first revision with a linkrev >= minlink
2957
2958
2958 This function is called when we're stripping revision minlink and
2959 This function is called when we're stripping revision minlink and
2959 its descendants from the repository.
2960 its descendants from the repository.
2960
2961
2961 We have to remove all revisions with linkrev >= minlink, because
2962 We have to remove all revisions with linkrev >= minlink, because
2962 the equivalent changelog revisions will be renumbered after the
2963 the equivalent changelog revisions will be renumbered after the
2963 strip.
2964 strip.
2964
2965
2965 So we truncate the revlog on the first of these revisions, and
2966 So we truncate the revlog on the first of these revisions, and
2966 trust that the caller has saved the revisions that shouldn't be
2967 trust that the caller has saved the revisions that shouldn't be
2967 removed and that it'll re-add them after this truncation.
2968 removed and that it'll re-add them after this truncation.
2968 """
2969 """
2969 if len(self) == 0:
2970 if len(self) == 0:
2970 return
2971 return
2971
2972
2972 rev, _ = self.getstrippoint(minlink)
2973 rev, _ = self.getstrippoint(minlink)
2973 if rev == len(self):
2974 if rev == len(self):
2974 return
2975 return
2975
2976
2976 # first truncate the files on disk
2977 # first truncate the files on disk
2977 data_end = self.start(rev)
2978 data_end = self.start(rev)
2978 if not self._inline:
2979 if not self._inline:
2979 transaction.add(self._datafile, data_end)
2980 transaction.add(self._datafile, data_end)
2980 end = rev * self.index.entry_size
2981 end = rev * self.index.entry_size
2981 else:
2982 else:
2982 end = data_end + (rev * self.index.entry_size)
2983 end = data_end + (rev * self.index.entry_size)
2983
2984
2984 if self._sidedatafile:
2985 if self._sidedatafile:
2985 sidedata_end = self.sidedata_cut_off(rev)
2986 sidedata_end = self.sidedata_cut_off(rev)
2986 transaction.add(self._sidedatafile, sidedata_end)
2987 transaction.add(self._sidedatafile, sidedata_end)
2987
2988
2988 transaction.add(self._indexfile, end)
2989 transaction.add(self._indexfile, end)
2989 if self._docket is not None:
2990 if self._docket is not None:
2990 # XXX we could, leverage the docket while stripping. However it is
2991 # XXX we could, leverage the docket while stripping. However it is
2991 # not powerfull enough at the time of this comment
2992 # not powerfull enough at the time of this comment
2992 self._docket.index_end = end
2993 self._docket.index_end = end
2993 self._docket.data_end = data_end
2994 self._docket.data_end = data_end
2994 self._docket.sidedata_end = sidedata_end
2995 self._docket.sidedata_end = sidedata_end
2995 self._docket.write(transaction, stripping=True)
2996 self._docket.write(transaction, stripping=True)
2996
2997
2997 # then reset internal state in memory to forget those revisions
2998 # then reset internal state in memory to forget those revisions
2998 self._revisioncache = None
2999 self._revisioncache = None
2999 self._chaininfocache = util.lrucachedict(500)
3000 self._chaininfocache = util.lrucachedict(500)
3000 self._segmentfile.clear_cache()
3001 self._segmentfile.clear_cache()
3001 self._segmentfile_sidedata.clear_cache()
3002 self._segmentfile_sidedata.clear_cache()
3002
3003
3003 del self.index[rev:-1]
3004 del self.index[rev:-1]
3004
3005
3005 def checksize(self):
3006 def checksize(self):
3006 """Check size of index and data files
3007 """Check size of index and data files
3007
3008
3008 return a (dd, di) tuple.
3009 return a (dd, di) tuple.
3009 - dd: extra bytes for the "data" file
3010 - dd: extra bytes for the "data" file
3010 - di: extra bytes for the "index" file
3011 - di: extra bytes for the "index" file
3011
3012
3012 A healthy revlog will return (0, 0).
3013 A healthy revlog will return (0, 0).
3013 """
3014 """
3014 expected = 0
3015 expected = 0
3015 if len(self):
3016 if len(self):
3016 expected = max(0, self.end(len(self) - 1))
3017 expected = max(0, self.end(len(self) - 1))
3017
3018
3018 try:
3019 try:
3019 with self._datafp() as f:
3020 with self._datafp() as f:
3020 f.seek(0, io.SEEK_END)
3021 f.seek(0, io.SEEK_END)
3021 actual = f.tell()
3022 actual = f.tell()
3022 dd = actual - expected
3023 dd = actual - expected
3023 except FileNotFoundError:
3024 except FileNotFoundError:
3024 dd = 0
3025 dd = 0
3025
3026
3026 try:
3027 try:
3027 f = self.opener(self._indexfile)
3028 f = self.opener(self._indexfile)
3028 f.seek(0, io.SEEK_END)
3029 f.seek(0, io.SEEK_END)
3029 actual = f.tell()
3030 actual = f.tell()
3030 f.close()
3031 f.close()
3031 s = self.index.entry_size
3032 s = self.index.entry_size
3032 i = max(0, actual // s)
3033 i = max(0, actual // s)
3033 di = actual - (i * s)
3034 di = actual - (i * s)
3034 if self._inline:
3035 if self._inline:
3035 databytes = 0
3036 databytes = 0
3036 for r in self:
3037 for r in self:
3037 databytes += max(0, self.length(r))
3038 databytes += max(0, self.length(r))
3038 dd = 0
3039 dd = 0
3039 di = actual - len(self) * s - databytes
3040 di = actual - len(self) * s - databytes
3040 except FileNotFoundError:
3041 except FileNotFoundError:
3041 di = 0
3042 di = 0
3042
3043
3043 return (dd, di)
3044 return (dd, di)
3044
3045
3045 def files(self):
3046 def files(self):
3046 res = [self._indexfile]
3047 res = [self._indexfile]
3047 if self._docket_file is None:
3048 if self._docket_file is None:
3048 if not self._inline:
3049 if not self._inline:
3049 res.append(self._datafile)
3050 res.append(self._datafile)
3050 else:
3051 else:
3051 res.append(self._docket_file)
3052 res.append(self._docket_file)
3052 res.extend(self._docket.old_index_filepaths(include_empty=False))
3053 res.extend(self._docket.old_index_filepaths(include_empty=False))
3053 if self._docket.data_end:
3054 if self._docket.data_end:
3054 res.append(self._datafile)
3055 res.append(self._datafile)
3055 res.extend(self._docket.old_data_filepaths(include_empty=False))
3056 res.extend(self._docket.old_data_filepaths(include_empty=False))
3056 if self._docket.sidedata_end:
3057 if self._docket.sidedata_end:
3057 res.append(self._sidedatafile)
3058 res.append(self._sidedatafile)
3058 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3059 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3059 return res
3060 return res
3060
3061
3061 def emitrevisions(
3062 def emitrevisions(
3062 self,
3063 self,
3063 nodes,
3064 nodes,
3064 nodesorder=None,
3065 nodesorder=None,
3065 revisiondata=False,
3066 revisiondata=False,
3066 assumehaveparentrevisions=False,
3067 assumehaveparentrevisions=False,
3067 deltamode=repository.CG_DELTAMODE_STD,
3068 deltamode=repository.CG_DELTAMODE_STD,
3068 sidedata_helpers=None,
3069 sidedata_helpers=None,
3069 debug_info=None,
3070 debug_info=None,
3070 ):
3071 ):
3071 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3072 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3072 raise error.ProgrammingError(
3073 raise error.ProgrammingError(
3073 b'unhandled value for nodesorder: %s' % nodesorder
3074 b'unhandled value for nodesorder: %s' % nodesorder
3074 )
3075 )
3075
3076
3076 if nodesorder is None and not self._generaldelta:
3077 if nodesorder is None and not self._generaldelta:
3077 nodesorder = b'storage'
3078 nodesorder = b'storage'
3078
3079
3079 if (
3080 if (
3080 not self._storedeltachains
3081 not self._storedeltachains
3081 and deltamode != repository.CG_DELTAMODE_PREV
3082 and deltamode != repository.CG_DELTAMODE_PREV
3082 ):
3083 ):
3083 deltamode = repository.CG_DELTAMODE_FULL
3084 deltamode = repository.CG_DELTAMODE_FULL
3084
3085
3085 return storageutil.emitrevisions(
3086 return storageutil.emitrevisions(
3086 self,
3087 self,
3087 nodes,
3088 nodes,
3088 nodesorder,
3089 nodesorder,
3089 revlogrevisiondelta,
3090 revlogrevisiondelta,
3090 deltaparentfn=self.deltaparent,
3091 deltaparentfn=self.deltaparent,
3091 candeltafn=self.candelta,
3092 candeltafn=self.candelta,
3092 rawsizefn=self.rawsize,
3093 rawsizefn=self.rawsize,
3093 revdifffn=self.revdiff,
3094 revdifffn=self.revdiff,
3094 flagsfn=self.flags,
3095 flagsfn=self.flags,
3095 deltamode=deltamode,
3096 deltamode=deltamode,
3096 revisiondata=revisiondata,
3097 revisiondata=revisiondata,
3097 assumehaveparentrevisions=assumehaveparentrevisions,
3098 assumehaveparentrevisions=assumehaveparentrevisions,
3098 sidedata_helpers=sidedata_helpers,
3099 sidedata_helpers=sidedata_helpers,
3099 debug_info=debug_info,
3100 debug_info=debug_info,
3100 )
3101 )
3101
3102
3102 DELTAREUSEALWAYS = b'always'
3103 DELTAREUSEALWAYS = b'always'
3103 DELTAREUSESAMEREVS = b'samerevs'
3104 DELTAREUSESAMEREVS = b'samerevs'
3104 DELTAREUSENEVER = b'never'
3105 DELTAREUSENEVER = b'never'
3105
3106
3106 DELTAREUSEFULLADD = b'fulladd'
3107 DELTAREUSEFULLADD = b'fulladd'
3107
3108
3108 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3109 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3109
3110
3110 def clone(
3111 def clone(
3111 self,
3112 self,
3112 tr,
3113 tr,
3113 destrevlog,
3114 destrevlog,
3114 addrevisioncb=None,
3115 addrevisioncb=None,
3115 deltareuse=DELTAREUSESAMEREVS,
3116 deltareuse=DELTAREUSESAMEREVS,
3116 forcedeltabothparents=None,
3117 forcedeltabothparents=None,
3117 sidedata_helpers=None,
3118 sidedata_helpers=None,
3118 ):
3119 ):
3119 """Copy this revlog to another, possibly with format changes.
3120 """Copy this revlog to another, possibly with format changes.
3120
3121
3121 The destination revlog will contain the same revisions and nodes.
3122 The destination revlog will contain the same revisions and nodes.
3122 However, it may not be bit-for-bit identical due to e.g. delta encoding
3123 However, it may not be bit-for-bit identical due to e.g. delta encoding
3123 differences.
3124 differences.
3124
3125
3125 The ``deltareuse`` argument control how deltas from the existing revlog
3126 The ``deltareuse`` argument control how deltas from the existing revlog
3126 are preserved in the destination revlog. The argument can have the
3127 are preserved in the destination revlog. The argument can have the
3127 following values:
3128 following values:
3128
3129
3129 DELTAREUSEALWAYS
3130 DELTAREUSEALWAYS
3130 Deltas will always be reused (if possible), even if the destination
3131 Deltas will always be reused (if possible), even if the destination
3131 revlog would not select the same revisions for the delta. This is the
3132 revlog would not select the same revisions for the delta. This is the
3132 fastest mode of operation.
3133 fastest mode of operation.
3133 DELTAREUSESAMEREVS
3134 DELTAREUSESAMEREVS
3134 Deltas will be reused if the destination revlog would pick the same
3135 Deltas will be reused if the destination revlog would pick the same
3135 revisions for the delta. This mode strikes a balance between speed
3136 revisions for the delta. This mode strikes a balance between speed
3136 and optimization.
3137 and optimization.
3137 DELTAREUSENEVER
3138 DELTAREUSENEVER
3138 Deltas will never be reused. This is the slowest mode of execution.
3139 Deltas will never be reused. This is the slowest mode of execution.
3139 This mode can be used to recompute deltas (e.g. if the diff/delta
3140 This mode can be used to recompute deltas (e.g. if the diff/delta
3140 algorithm changes).
3141 algorithm changes).
3141 DELTAREUSEFULLADD
3142 DELTAREUSEFULLADD
3142 Revision will be re-added as if their were new content. This is
3143 Revision will be re-added as if their were new content. This is
3143 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3144 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3144 eg: large file detection and handling.
3145 eg: large file detection and handling.
3145
3146
3146 Delta computation can be slow, so the choice of delta reuse policy can
3147 Delta computation can be slow, so the choice of delta reuse policy can
3147 significantly affect run time.
3148 significantly affect run time.
3148
3149
3149 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3150 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3150 two extremes. Deltas will be reused if they are appropriate. But if the
3151 two extremes. Deltas will be reused if they are appropriate. But if the
3151 delta could choose a better revision, it will do so. This means if you
3152 delta could choose a better revision, it will do so. This means if you
3152 are converting a non-generaldelta revlog to a generaldelta revlog,
3153 are converting a non-generaldelta revlog to a generaldelta revlog,
3153 deltas will be recomputed if the delta's parent isn't a parent of the
3154 deltas will be recomputed if the delta's parent isn't a parent of the
3154 revision.
3155 revision.
3155
3156
3156 In addition to the delta policy, the ``forcedeltabothparents``
3157 In addition to the delta policy, the ``forcedeltabothparents``
3157 argument controls whether to force compute deltas against both parents
3158 argument controls whether to force compute deltas against both parents
3158 for merges. By default, the current default is used.
3159 for merges. By default, the current default is used.
3159
3160
3160 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3161 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3161 `sidedata_helpers`.
3162 `sidedata_helpers`.
3162 """
3163 """
3163 if deltareuse not in self.DELTAREUSEALL:
3164 if deltareuse not in self.DELTAREUSEALL:
3164 raise ValueError(
3165 raise ValueError(
3165 _(b'value for deltareuse invalid: %s') % deltareuse
3166 _(b'value for deltareuse invalid: %s') % deltareuse
3166 )
3167 )
3167
3168
3168 if len(destrevlog):
3169 if len(destrevlog):
3169 raise ValueError(_(b'destination revlog is not empty'))
3170 raise ValueError(_(b'destination revlog is not empty'))
3170
3171
3171 if getattr(self, 'filteredrevs', None):
3172 if getattr(self, 'filteredrevs', None):
3172 raise ValueError(_(b'source revlog has filtered revisions'))
3173 raise ValueError(_(b'source revlog has filtered revisions'))
3173 if getattr(destrevlog, 'filteredrevs', None):
3174 if getattr(destrevlog, 'filteredrevs', None):
3174 raise ValueError(_(b'destination revlog has filtered revisions'))
3175 raise ValueError(_(b'destination revlog has filtered revisions'))
3175
3176
3176 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3177 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3177 # if possible.
3178 # if possible.
3178 oldlazydelta = destrevlog._lazydelta
3179 oldlazydelta = destrevlog._lazydelta
3179 oldlazydeltabase = destrevlog._lazydeltabase
3180 oldlazydeltabase = destrevlog._lazydeltabase
3180 oldamd = destrevlog._deltabothparents
3181 oldamd = destrevlog._deltabothparents
3181
3182
3182 try:
3183 try:
3183 if deltareuse == self.DELTAREUSEALWAYS:
3184 if deltareuse == self.DELTAREUSEALWAYS:
3184 destrevlog._lazydeltabase = True
3185 destrevlog._lazydeltabase = True
3185 destrevlog._lazydelta = True
3186 destrevlog._lazydelta = True
3186 elif deltareuse == self.DELTAREUSESAMEREVS:
3187 elif deltareuse == self.DELTAREUSESAMEREVS:
3187 destrevlog._lazydeltabase = False
3188 destrevlog._lazydeltabase = False
3188 destrevlog._lazydelta = True
3189 destrevlog._lazydelta = True
3189 elif deltareuse == self.DELTAREUSENEVER:
3190 elif deltareuse == self.DELTAREUSENEVER:
3190 destrevlog._lazydeltabase = False
3191 destrevlog._lazydeltabase = False
3191 destrevlog._lazydelta = False
3192 destrevlog._lazydelta = False
3192
3193
3193 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3194 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3194
3195
3195 self._clone(
3196 self._clone(
3196 tr,
3197 tr,
3197 destrevlog,
3198 destrevlog,
3198 addrevisioncb,
3199 addrevisioncb,
3199 deltareuse,
3200 deltareuse,
3200 forcedeltabothparents,
3201 forcedeltabothparents,
3201 sidedata_helpers,
3202 sidedata_helpers,
3202 )
3203 )
3203
3204
3204 finally:
3205 finally:
3205 destrevlog._lazydelta = oldlazydelta
3206 destrevlog._lazydelta = oldlazydelta
3206 destrevlog._lazydeltabase = oldlazydeltabase
3207 destrevlog._lazydeltabase = oldlazydeltabase
3207 destrevlog._deltabothparents = oldamd
3208 destrevlog._deltabothparents = oldamd
3208
3209
3209 def _clone(
3210 def _clone(
3210 self,
3211 self,
3211 tr,
3212 tr,
3212 destrevlog,
3213 destrevlog,
3213 addrevisioncb,
3214 addrevisioncb,
3214 deltareuse,
3215 deltareuse,
3215 forcedeltabothparents,
3216 forcedeltabothparents,
3216 sidedata_helpers,
3217 sidedata_helpers,
3217 ):
3218 ):
3218 """perform the core duty of `revlog.clone` after parameter processing"""
3219 """perform the core duty of `revlog.clone` after parameter processing"""
3219 write_debug = None
3220 write_debug = None
3220 if self._debug_delta:
3221 if self._debug_delta:
3221 write_debug = tr._report
3222 write_debug = tr._report
3222 deltacomputer = deltautil.deltacomputer(
3223 deltacomputer = deltautil.deltacomputer(
3223 destrevlog,
3224 destrevlog,
3224 write_debug=write_debug,
3225 write_debug=write_debug,
3225 )
3226 )
3226 index = self.index
3227 index = self.index
3227 for rev in self:
3228 for rev in self:
3228 entry = index[rev]
3229 entry = index[rev]
3229
3230
3230 # Some classes override linkrev to take filtered revs into
3231 # Some classes override linkrev to take filtered revs into
3231 # account. Use raw entry from index.
3232 # account. Use raw entry from index.
3232 flags = entry[0] & 0xFFFF
3233 flags = entry[0] & 0xFFFF
3233 linkrev = entry[4]
3234 linkrev = entry[4]
3234 p1 = index[entry[5]][7]
3235 p1 = index[entry[5]][7]
3235 p2 = index[entry[6]][7]
3236 p2 = index[entry[6]][7]
3236 node = entry[7]
3237 node = entry[7]
3237
3238
3238 # (Possibly) reuse the delta from the revlog if allowed and
3239 # (Possibly) reuse the delta from the revlog if allowed and
3239 # the revlog chunk is a delta.
3240 # the revlog chunk is a delta.
3240 cachedelta = None
3241 cachedelta = None
3241 rawtext = None
3242 rawtext = None
3242 if deltareuse == self.DELTAREUSEFULLADD:
3243 if deltareuse == self.DELTAREUSEFULLADD:
3243 text = self._revisiondata(rev)
3244 text = self._revisiondata(rev)
3244 sidedata = self.sidedata(rev)
3245 sidedata = self.sidedata(rev)
3245
3246
3246 if sidedata_helpers is not None:
3247 if sidedata_helpers is not None:
3247 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3248 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3248 self, sidedata_helpers, sidedata, rev
3249 self, sidedata_helpers, sidedata, rev
3249 )
3250 )
3250 flags = flags | new_flags[0] & ~new_flags[1]
3251 flags = flags | new_flags[0] & ~new_flags[1]
3251
3252
3252 destrevlog.addrevision(
3253 destrevlog.addrevision(
3253 text,
3254 text,
3254 tr,
3255 tr,
3255 linkrev,
3256 linkrev,
3256 p1,
3257 p1,
3257 p2,
3258 p2,
3258 cachedelta=cachedelta,
3259 cachedelta=cachedelta,
3259 node=node,
3260 node=node,
3260 flags=flags,
3261 flags=flags,
3261 deltacomputer=deltacomputer,
3262 deltacomputer=deltacomputer,
3262 sidedata=sidedata,
3263 sidedata=sidedata,
3263 )
3264 )
3264 else:
3265 else:
3265 if destrevlog._lazydelta:
3266 if destrevlog._lazydelta:
3266 dp = self.deltaparent(rev)
3267 dp = self.deltaparent(rev)
3267 if dp != nullrev:
3268 if dp != nullrev:
3268 cachedelta = (dp, bytes(self._chunk(rev)))
3269 cachedelta = (dp, bytes(self._chunk(rev)))
3269
3270
3270 sidedata = None
3271 sidedata = None
3271 if not cachedelta:
3272 if not cachedelta:
3272 rawtext = self._revisiondata(rev)
3273 rawtext = self._revisiondata(rev)
3273 sidedata = self.sidedata(rev)
3274 sidedata = self.sidedata(rev)
3274 if sidedata is None:
3275 if sidedata is None:
3275 sidedata = self.sidedata(rev)
3276 sidedata = self.sidedata(rev)
3276
3277
3277 if sidedata_helpers is not None:
3278 if sidedata_helpers is not None:
3278 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3279 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3279 self, sidedata_helpers, sidedata, rev
3280 self, sidedata_helpers, sidedata, rev
3280 )
3281 )
3281 flags = flags | new_flags[0] & ~new_flags[1]
3282 flags = flags | new_flags[0] & ~new_flags[1]
3282
3283
3283 with destrevlog._writing(tr):
3284 with destrevlog._writing(tr):
3284 destrevlog._addrevision(
3285 destrevlog._addrevision(
3285 node,
3286 node,
3286 rawtext,
3287 rawtext,
3287 tr,
3288 tr,
3288 linkrev,
3289 linkrev,
3289 p1,
3290 p1,
3290 p2,
3291 p2,
3291 flags,
3292 flags,
3292 cachedelta,
3293 cachedelta,
3293 deltacomputer=deltacomputer,
3294 deltacomputer=deltacomputer,
3294 sidedata=sidedata,
3295 sidedata=sidedata,
3295 )
3296 )
3296
3297
3297 if addrevisioncb:
3298 if addrevisioncb:
3298 addrevisioncb(self, rev, node)
3299 addrevisioncb(self, rev, node)
3299
3300
3300 def censorrevision(self, tr, censornode, tombstone=b''):
3301 def censorrevision(self, tr, censornode, tombstone=b''):
3301 if self._format_version == REVLOGV0:
3302 if self._format_version == REVLOGV0:
3302 raise error.RevlogError(
3303 raise error.RevlogError(
3303 _(b'cannot censor with version %d revlogs')
3304 _(b'cannot censor with version %d revlogs')
3304 % self._format_version
3305 % self._format_version
3305 )
3306 )
3306 elif self._format_version == REVLOGV1:
3307 elif self._format_version == REVLOGV1:
3307 rewrite.v1_censor(self, tr, censornode, tombstone)
3308 rewrite.v1_censor(self, tr, censornode, tombstone)
3308 else:
3309 else:
3309 rewrite.v2_censor(self, tr, censornode, tombstone)
3310 rewrite.v2_censor(self, tr, censornode, tombstone)
3310
3311
3311 def verifyintegrity(self, state):
3312 def verifyintegrity(self, state):
3312 """Verifies the integrity of the revlog.
3313 """Verifies the integrity of the revlog.
3313
3314
3314 Yields ``revlogproblem`` instances describing problems that are
3315 Yields ``revlogproblem`` instances describing problems that are
3315 found.
3316 found.
3316 """
3317 """
3317 dd, di = self.checksize()
3318 dd, di = self.checksize()
3318 if dd:
3319 if dd:
3319 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3320 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3320 if di:
3321 if di:
3321 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3322 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3322
3323
3323 version = self._format_version
3324 version = self._format_version
3324
3325
3325 # The verifier tells us what version revlog we should be.
3326 # The verifier tells us what version revlog we should be.
3326 if version != state[b'expectedversion']:
3327 if version != state[b'expectedversion']:
3327 yield revlogproblem(
3328 yield revlogproblem(
3328 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3329 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3329 % (self.display_id, version, state[b'expectedversion'])
3330 % (self.display_id, version, state[b'expectedversion'])
3330 )
3331 )
3331
3332
3332 state[b'skipread'] = set()
3333 state[b'skipread'] = set()
3333 state[b'safe_renamed'] = set()
3334 state[b'safe_renamed'] = set()
3334
3335
3335 for rev in self:
3336 for rev in self:
3336 node = self.node(rev)
3337 node = self.node(rev)
3337
3338
3338 # Verify contents. 4 cases to care about:
3339 # Verify contents. 4 cases to care about:
3339 #
3340 #
3340 # common: the most common case
3341 # common: the most common case
3341 # rename: with a rename
3342 # rename: with a rename
3342 # meta: file content starts with b'\1\n', the metadata
3343 # meta: file content starts with b'\1\n', the metadata
3343 # header defined in filelog.py, but without a rename
3344 # header defined in filelog.py, but without a rename
3344 # ext: content stored externally
3345 # ext: content stored externally
3345 #
3346 #
3346 # More formally, their differences are shown below:
3347 # More formally, their differences are shown below:
3347 #
3348 #
3348 # | common | rename | meta | ext
3349 # | common | rename | meta | ext
3349 # -------------------------------------------------------
3350 # -------------------------------------------------------
3350 # flags() | 0 | 0 | 0 | not 0
3351 # flags() | 0 | 0 | 0 | not 0
3351 # renamed() | False | True | False | ?
3352 # renamed() | False | True | False | ?
3352 # rawtext[0:2]=='\1\n'| False | True | True | ?
3353 # rawtext[0:2]=='\1\n'| False | True | True | ?
3353 #
3354 #
3354 # "rawtext" means the raw text stored in revlog data, which
3355 # "rawtext" means the raw text stored in revlog data, which
3355 # could be retrieved by "rawdata(rev)". "text"
3356 # could be retrieved by "rawdata(rev)". "text"
3356 # mentioned below is "revision(rev)".
3357 # mentioned below is "revision(rev)".
3357 #
3358 #
3358 # There are 3 different lengths stored physically:
3359 # There are 3 different lengths stored physically:
3359 # 1. L1: rawsize, stored in revlog index
3360 # 1. L1: rawsize, stored in revlog index
3360 # 2. L2: len(rawtext), stored in revlog data
3361 # 2. L2: len(rawtext), stored in revlog data
3361 # 3. L3: len(text), stored in revlog data if flags==0, or
3362 # 3. L3: len(text), stored in revlog data if flags==0, or
3362 # possibly somewhere else if flags!=0
3363 # possibly somewhere else if flags!=0
3363 #
3364 #
3364 # L1 should be equal to L2. L3 could be different from them.
3365 # L1 should be equal to L2. L3 could be different from them.
3365 # "text" may or may not affect commit hash depending on flag
3366 # "text" may or may not affect commit hash depending on flag
3366 # processors (see flagutil.addflagprocessor).
3367 # processors (see flagutil.addflagprocessor).
3367 #
3368 #
3368 # | common | rename | meta | ext
3369 # | common | rename | meta | ext
3369 # -------------------------------------------------
3370 # -------------------------------------------------
3370 # rawsize() | L1 | L1 | L1 | L1
3371 # rawsize() | L1 | L1 | L1 | L1
3371 # size() | L1 | L2-LM | L1(*) | L1 (?)
3372 # size() | L1 | L2-LM | L1(*) | L1 (?)
3372 # len(rawtext) | L2 | L2 | L2 | L2
3373 # len(rawtext) | L2 | L2 | L2 | L2
3373 # len(text) | L2 | L2 | L2 | L3
3374 # len(text) | L2 | L2 | L2 | L3
3374 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3375 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3375 #
3376 #
3376 # LM: length of metadata, depending on rawtext
3377 # LM: length of metadata, depending on rawtext
3377 # (*): not ideal, see comment in filelog.size
3378 # (*): not ideal, see comment in filelog.size
3378 # (?): could be "- len(meta)" if the resolved content has
3379 # (?): could be "- len(meta)" if the resolved content has
3379 # rename metadata
3380 # rename metadata
3380 #
3381 #
3381 # Checks needed to be done:
3382 # Checks needed to be done:
3382 # 1. length check: L1 == L2, in all cases.
3383 # 1. length check: L1 == L2, in all cases.
3383 # 2. hash check: depending on flag processor, we may need to
3384 # 2. hash check: depending on flag processor, we may need to
3384 # use either "text" (external), or "rawtext" (in revlog).
3385 # use either "text" (external), or "rawtext" (in revlog).
3385
3386
3386 try:
3387 try:
3387 skipflags = state.get(b'skipflags', 0)
3388 skipflags = state.get(b'skipflags', 0)
3388 if skipflags:
3389 if skipflags:
3389 skipflags &= self.flags(rev)
3390 skipflags &= self.flags(rev)
3390
3391
3391 _verify_revision(self, skipflags, state, node)
3392 _verify_revision(self, skipflags, state, node)
3392
3393
3393 l1 = self.rawsize(rev)
3394 l1 = self.rawsize(rev)
3394 l2 = len(self.rawdata(node))
3395 l2 = len(self.rawdata(node))
3395
3396
3396 if l1 != l2:
3397 if l1 != l2:
3397 yield revlogproblem(
3398 yield revlogproblem(
3398 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3399 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3399 node=node,
3400 node=node,
3400 )
3401 )
3401
3402
3402 except error.CensoredNodeError:
3403 except error.CensoredNodeError:
3403 if state[b'erroroncensored']:
3404 if state[b'erroroncensored']:
3404 yield revlogproblem(
3405 yield revlogproblem(
3405 error=_(b'censored file data'), node=node
3406 error=_(b'censored file data'), node=node
3406 )
3407 )
3407 state[b'skipread'].add(node)
3408 state[b'skipread'].add(node)
3408 except Exception as e:
3409 except Exception as e:
3409 yield revlogproblem(
3410 yield revlogproblem(
3410 error=_(b'unpacking %s: %s')
3411 error=_(b'unpacking %s: %s')
3411 % (short(node), stringutil.forcebytestr(e)),
3412 % (short(node), stringutil.forcebytestr(e)),
3412 node=node,
3413 node=node,
3413 )
3414 )
3414 state[b'skipread'].add(node)
3415 state[b'skipread'].add(node)
3415
3416
3416 def storageinfo(
3417 def storageinfo(
3417 self,
3418 self,
3418 exclusivefiles=False,
3419 exclusivefiles=False,
3419 sharedfiles=False,
3420 sharedfiles=False,
3420 revisionscount=False,
3421 revisionscount=False,
3421 trackedsize=False,
3422 trackedsize=False,
3422 storedsize=False,
3423 storedsize=False,
3423 ):
3424 ):
3424 d = {}
3425 d = {}
3425
3426
3426 if exclusivefiles:
3427 if exclusivefiles:
3427 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3428 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3428 if not self._inline:
3429 if not self._inline:
3429 d[b'exclusivefiles'].append((self.opener, self._datafile))
3430 d[b'exclusivefiles'].append((self.opener, self._datafile))
3430
3431
3431 if sharedfiles:
3432 if sharedfiles:
3432 d[b'sharedfiles'] = []
3433 d[b'sharedfiles'] = []
3433
3434
3434 if revisionscount:
3435 if revisionscount:
3435 d[b'revisionscount'] = len(self)
3436 d[b'revisionscount'] = len(self)
3436
3437
3437 if trackedsize:
3438 if trackedsize:
3438 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3439 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3439
3440
3440 if storedsize:
3441 if storedsize:
3441 d[b'storedsize'] = sum(
3442 d[b'storedsize'] = sum(
3442 self.opener.stat(path).st_size for path in self.files()
3443 self.opener.stat(path).st_size for path in self.files()
3443 )
3444 )
3444
3445
3445 return d
3446 return d
3446
3447
3447 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3448 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3448 if not self.hassidedata:
3449 if not self.hassidedata:
3449 return
3450 return
3450 # revlog formats with sidedata support does not support inline
3451 # revlog formats with sidedata support does not support inline
3451 assert not self._inline
3452 assert not self._inline
3452 if not helpers[1] and not helpers[2]:
3453 if not helpers[1] and not helpers[2]:
3453 # Nothing to generate or remove
3454 # Nothing to generate or remove
3454 return
3455 return
3455
3456
3456 new_entries = []
3457 new_entries = []
3457 # append the new sidedata
3458 # append the new sidedata
3458 with self._writing(transaction):
3459 with self._writing(transaction):
3459 ifh, dfh, sdfh = self._writinghandles
3460 ifh, dfh, sdfh = self._writinghandles
3460 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3461 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3461
3462
3462 current_offset = sdfh.tell()
3463 current_offset = sdfh.tell()
3463 for rev in range(startrev, endrev + 1):
3464 for rev in range(startrev, endrev + 1):
3464 entry = self.index[rev]
3465 entry = self.index[rev]
3465 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3466 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3466 store=self,
3467 store=self,
3467 sidedata_helpers=helpers,
3468 sidedata_helpers=helpers,
3468 sidedata={},
3469 sidedata={},
3469 rev=rev,
3470 rev=rev,
3470 )
3471 )
3471
3472
3472 serialized_sidedata = sidedatautil.serialize_sidedata(
3473 serialized_sidedata = sidedatautil.serialize_sidedata(
3473 new_sidedata
3474 new_sidedata
3474 )
3475 )
3475
3476
3476 sidedata_compression_mode = COMP_MODE_INLINE
3477 sidedata_compression_mode = COMP_MODE_INLINE
3477 if serialized_sidedata and self.hassidedata:
3478 if serialized_sidedata and self.hassidedata:
3478 sidedata_compression_mode = COMP_MODE_PLAIN
3479 sidedata_compression_mode = COMP_MODE_PLAIN
3479 h, comp_sidedata = self.compress(serialized_sidedata)
3480 h, comp_sidedata = self.compress(serialized_sidedata)
3480 if (
3481 if (
3481 h != b'u'
3482 h != b'u'
3482 and comp_sidedata[0] != b'\0'
3483 and comp_sidedata[0] != b'\0'
3483 and len(comp_sidedata) < len(serialized_sidedata)
3484 and len(comp_sidedata) < len(serialized_sidedata)
3484 ):
3485 ):
3485 assert not h
3486 assert not h
3486 if (
3487 if (
3487 comp_sidedata[0]
3488 comp_sidedata[0]
3488 == self._docket.default_compression_header
3489 == self._docket.default_compression_header
3489 ):
3490 ):
3490 sidedata_compression_mode = COMP_MODE_DEFAULT
3491 sidedata_compression_mode = COMP_MODE_DEFAULT
3491 serialized_sidedata = comp_sidedata
3492 serialized_sidedata = comp_sidedata
3492 else:
3493 else:
3493 sidedata_compression_mode = COMP_MODE_INLINE
3494 sidedata_compression_mode = COMP_MODE_INLINE
3494 serialized_sidedata = comp_sidedata
3495 serialized_sidedata = comp_sidedata
3495 if entry[8] != 0 or entry[9] != 0:
3496 if entry[8] != 0 or entry[9] != 0:
3496 # rewriting entries that already have sidedata is not
3497 # rewriting entries that already have sidedata is not
3497 # supported yet, because it introduces garbage data in the
3498 # supported yet, because it introduces garbage data in the
3498 # revlog.
3499 # revlog.
3499 msg = b"rewriting existing sidedata is not supported yet"
3500 msg = b"rewriting existing sidedata is not supported yet"
3500 raise error.Abort(msg)
3501 raise error.Abort(msg)
3501
3502
3502 # Apply (potential) flags to add and to remove after running
3503 # Apply (potential) flags to add and to remove after running
3503 # the sidedata helpers
3504 # the sidedata helpers
3504 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3505 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3505 entry_update = (
3506 entry_update = (
3506 current_offset,
3507 current_offset,
3507 len(serialized_sidedata),
3508 len(serialized_sidedata),
3508 new_offset_flags,
3509 new_offset_flags,
3509 sidedata_compression_mode,
3510 sidedata_compression_mode,
3510 )
3511 )
3511
3512
3512 # the sidedata computation might have move the file cursors around
3513 # the sidedata computation might have move the file cursors around
3513 sdfh.seek(current_offset, os.SEEK_SET)
3514 sdfh.seek(current_offset, os.SEEK_SET)
3514 sdfh.write(serialized_sidedata)
3515 sdfh.write(serialized_sidedata)
3515 new_entries.append(entry_update)
3516 new_entries.append(entry_update)
3516 current_offset += len(serialized_sidedata)
3517 current_offset += len(serialized_sidedata)
3517 self._docket.sidedata_end = sdfh.tell()
3518 self._docket.sidedata_end = sdfh.tell()
3518
3519
3519 # rewrite the new index entries
3520 # rewrite the new index entries
3520 ifh.seek(startrev * self.index.entry_size)
3521 ifh.seek(startrev * self.index.entry_size)
3521 for i, e in enumerate(new_entries):
3522 for i, e in enumerate(new_entries):
3522 rev = startrev + i
3523 rev = startrev + i
3523 self.index.replace_sidedata_info(rev, *e)
3524 self.index.replace_sidedata_info(rev, *e)
3524 packed = self.index.entry_binary(rev)
3525 packed = self.index.entry_binary(rev)
3525 if rev == 0 and self._docket is None:
3526 if rev == 0 and self._docket is None:
3526 header = self._format_flags | self._format_version
3527 header = self._format_flags | self._format_version
3527 header = self.index.pack_header(header)
3528 header = self.index.pack_header(header)
3528 packed = header + packed
3529 packed = header + packed
3529 ifh.write(packed)
3530 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now