##// END OF EJS Templates
revlog: document the `clearcaches` method...
marmoute -
r51899:6c3798b4 default
parent child Browse files
Show More
@@ -1,3550 +1,3551 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class revlog:
244 class revlog:
245 """
245 """
246 the underlying revision storage object
246 the underlying revision storage object
247
247
248 A revlog consists of two parts, an index and the revision data.
248 A revlog consists of two parts, an index and the revision data.
249
249
250 The index is a file with a fixed record size containing
250 The index is a file with a fixed record size containing
251 information on each revision, including its nodeid (hash), the
251 information on each revision, including its nodeid (hash), the
252 nodeids of its parents, the position and offset of its data within
252 nodeids of its parents, the position and offset of its data within
253 the data file, and the revision it's based on. Finally, each entry
253 the data file, and the revision it's based on. Finally, each entry
254 contains a linkrev entry that can serve as a pointer to external
254 contains a linkrev entry that can serve as a pointer to external
255 data.
255 data.
256
256
257 The revision data itself is a linear collection of data chunks.
257 The revision data itself is a linear collection of data chunks.
258 Each chunk represents a revision and is usually represented as a
258 Each chunk represents a revision and is usually represented as a
259 delta against the previous chunk. To bound lookup time, runs of
259 delta against the previous chunk. To bound lookup time, runs of
260 deltas are limited to about 2 times the length of the original
260 deltas are limited to about 2 times the length of the original
261 version data. This makes retrieval of a version proportional to
261 version data. This makes retrieval of a version proportional to
262 its size, or O(1) relative to the number of revisions.
262 its size, or O(1) relative to the number of revisions.
263
263
264 Both pieces of the revlog are written to in an append-only
264 Both pieces of the revlog are written to in an append-only
265 fashion, which means we never need to rewrite a file to insert or
265 fashion, which means we never need to rewrite a file to insert or
266 remove data, and can use some simple techniques to avoid the need
266 remove data, and can use some simple techniques to avoid the need
267 for locking while reading.
267 for locking while reading.
268
268
269 If checkambig, indexfile is opened with checkambig=True at
269 If checkambig, indexfile is opened with checkambig=True at
270 writing, to avoid file stat ambiguity.
270 writing, to avoid file stat ambiguity.
271
271
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 If mmaplargeindex is True, and an mmapindexthreshold is set, the
273 index will be mmapped rather than read if it is larger than the
273 index will be mmapped rather than read if it is larger than the
274 configured threshold.
274 configured threshold.
275
275
276 If censorable is True, the revlog can have censored revisions.
276 If censorable is True, the revlog can have censored revisions.
277
277
278 If `upperboundcomp` is not None, this is the expected maximal gain from
278 If `upperboundcomp` is not None, this is the expected maximal gain from
279 compression for the data content.
279 compression for the data content.
280
280
281 `concurrencychecker` is an optional function that receives 3 arguments: a
281 `concurrencychecker` is an optional function that receives 3 arguments: a
282 file handle, a filename, and an expected position. It should check whether
282 file handle, a filename, and an expected position. It should check whether
283 the current position in the file handle is valid, and log/warn/fail (by
283 the current position in the file handle is valid, and log/warn/fail (by
284 raising).
284 raising).
285
285
286 See mercurial/revlogutils/contants.py for details about the content of an
286 See mercurial/revlogutils/contants.py for details about the content of an
287 index entry.
287 index entry.
288 """
288 """
289
289
290 _flagserrorclass = error.RevlogError
290 _flagserrorclass = error.RevlogError
291
291
292 @staticmethod
292 @staticmethod
293 def is_inline_index(header_bytes):
293 def is_inline_index(header_bytes):
294 """Determine if a revlog is inline from the initial bytes of the index"""
294 """Determine if a revlog is inline from the initial bytes of the index"""
295 header = INDEX_HEADER.unpack(header_bytes)[0]
295 header = INDEX_HEADER.unpack(header_bytes)[0]
296
296
297 _format_flags = header & ~0xFFFF
297 _format_flags = header & ~0xFFFF
298 _format_version = header & 0xFFFF
298 _format_version = header & 0xFFFF
299
299
300 features = FEATURES_BY_VERSION[_format_version]
300 features = FEATURES_BY_VERSION[_format_version]
301 return features[b'inline'](_format_flags)
301 return features[b'inline'](_format_flags)
302
302
303 def __init__(
303 def __init__(
304 self,
304 self,
305 opener,
305 opener,
306 target,
306 target,
307 radix,
307 radix,
308 postfix=None, # only exist for `tmpcensored` now
308 postfix=None, # only exist for `tmpcensored` now
309 checkambig=False,
309 checkambig=False,
310 mmaplargeindex=False,
310 mmaplargeindex=False,
311 censorable=False,
311 censorable=False,
312 upperboundcomp=None,
312 upperboundcomp=None,
313 persistentnodemap=False,
313 persistentnodemap=False,
314 concurrencychecker=None,
314 concurrencychecker=None,
315 trypending=False,
315 trypending=False,
316 try_split=False,
316 try_split=False,
317 canonical_parent_order=True,
317 canonical_parent_order=True,
318 ):
318 ):
319 """
319 """
320 create a revlog object
320 create a revlog object
321
321
322 opener is a function that abstracts the file opening operation
322 opener is a function that abstracts the file opening operation
323 and can be used to implement COW semantics or the like.
323 and can be used to implement COW semantics or the like.
324
324
325 `target`: a (KIND, ID) tuple that identify the content stored in
325 `target`: a (KIND, ID) tuple that identify the content stored in
326 this revlog. It help the rest of the code to understand what the revlog
326 this revlog. It help the rest of the code to understand what the revlog
327 is about without having to resort to heuristic and index filename
327 is about without having to resort to heuristic and index filename
328 analysis. Note: that this must be reliably be set by normal code, but
328 analysis. Note: that this must be reliably be set by normal code, but
329 that test, debug, or performance measurement code might not set this to
329 that test, debug, or performance measurement code might not set this to
330 accurate value.
330 accurate value.
331 """
331 """
332 self.upperboundcomp = upperboundcomp
332 self.upperboundcomp = upperboundcomp
333
333
334 self.radix = radix
334 self.radix = radix
335
335
336 self._docket_file = None
336 self._docket_file = None
337 self._indexfile = None
337 self._indexfile = None
338 self._datafile = None
338 self._datafile = None
339 self._sidedatafile = None
339 self._sidedatafile = None
340 self._nodemap_file = None
340 self._nodemap_file = None
341 self.postfix = postfix
341 self.postfix = postfix
342 self._trypending = trypending
342 self._trypending = trypending
343 self._try_split = try_split
343 self._try_split = try_split
344 self.opener = opener
344 self.opener = opener
345 if persistentnodemap:
345 if persistentnodemap:
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
346 self._nodemap_file = nodemaputil.get_nodemap_file(self)
347
347
348 assert target[0] in ALL_KINDS
348 assert target[0] in ALL_KINDS
349 assert len(target) == 2
349 assert len(target) == 2
350 self.target = target
350 self.target = target
351 # When True, indexfile is opened with checkambig=True at writing, to
351 # When True, indexfile is opened with checkambig=True at writing, to
352 # avoid file stat ambiguity.
352 # avoid file stat ambiguity.
353 self._checkambig = checkambig
353 self._checkambig = checkambig
354 self._mmaplargeindex = mmaplargeindex
354 self._mmaplargeindex = mmaplargeindex
355 self._censorable = censorable
355 self._censorable = censorable
356 # 3-tuple of (node, rev, text) for a raw revision.
356 # 3-tuple of (node, rev, text) for a raw revision.
357 self._revisioncache = None
357 self._revisioncache = None
358 # Maps rev to chain base rev.
358 # Maps rev to chain base rev.
359 self._chainbasecache = util.lrucachedict(100)
359 self._chainbasecache = util.lrucachedict(100)
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
360 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 self._chunkcache = (0, b'')
361 self._chunkcache = (0, b'')
362 # How much data to read and cache into the raw revlog data cache.
362 # How much data to read and cache into the raw revlog data cache.
363 self._chunkcachesize = 65536
363 self._chunkcachesize = 65536
364 self._maxchainlen = None
364 self._maxchainlen = None
365 self._deltabothparents = True
365 self._deltabothparents = True
366 self._candidate_group_chunk_size = 0
366 self._candidate_group_chunk_size = 0
367 self._debug_delta = False
367 self._debug_delta = False
368 self.index = None
368 self.index = None
369 self._docket = None
369 self._docket = None
370 self._nodemap_docket = None
370 self._nodemap_docket = None
371 # Mapping of partial identifiers to full nodes.
371 # Mapping of partial identifiers to full nodes.
372 self._pcache = {}
372 self._pcache = {}
373 # Mapping of revision integer to full node.
373 # Mapping of revision integer to full node.
374 self._compengine = b'zlib'
374 self._compengine = b'zlib'
375 self._compengineopts = {}
375 self._compengineopts = {}
376 self._maxdeltachainspan = -1
376 self._maxdeltachainspan = -1
377 self._withsparseread = False
377 self._withsparseread = False
378 self._sparserevlog = False
378 self._sparserevlog = False
379 self.hassidedata = False
379 self.hassidedata = False
380 self._srdensitythreshold = 0.50
380 self._srdensitythreshold = 0.50
381 self._srmingapsize = 262144
381 self._srmingapsize = 262144
382
382
383 # other optionnals features
383 # other optionnals features
384
384
385 # might remove rank configuration once the computation has no impact
385 # might remove rank configuration once the computation has no impact
386 self._compute_rank = False
386 self._compute_rank = False
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 3-tuple of file handles being used for active writing.
392 # 3-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394 # prevent nesting of addgroup
394 # prevent nesting of addgroup
395 self._adding_group = None
395 self._adding_group = None
396
396
397 self._loadindex()
397 self._loadindex()
398
398
399 self._concurrencychecker = concurrencychecker
399 self._concurrencychecker = concurrencychecker
400
400
401 # parent order is supposed to be semantically irrelevant, so we
401 # parent order is supposed to be semantically irrelevant, so we
402 # normally resort parents to ensure that the first parent is non-null,
402 # normally resort parents to ensure that the first parent is non-null,
403 # if there is a non-null parent at all.
403 # if there is a non-null parent at all.
404 # filelog abuses the parent order as flag to mark some instances of
404 # filelog abuses the parent order as flag to mark some instances of
405 # meta-encoded files, so allow it to disable this behavior.
405 # meta-encoded files, so allow it to disable this behavior.
406 self.canonical_parent_order = canonical_parent_order
406 self.canonical_parent_order = canonical_parent_order
407
407
408 def _init_opts(self):
408 def _init_opts(self):
409 """process options (from above/config) to setup associated default revlog mode
409 """process options (from above/config) to setup associated default revlog mode
410
410
411 These values might be affected when actually reading on disk information.
411 These values might be affected when actually reading on disk information.
412
412
413 The relevant values are returned for use in _loadindex().
413 The relevant values are returned for use in _loadindex().
414
414
415 * newversionflags:
415 * newversionflags:
416 version header to use if we need to create a new revlog
416 version header to use if we need to create a new revlog
417
417
418 * mmapindexthreshold:
418 * mmapindexthreshold:
419 minimal index size for start to use mmap
419 minimal index size for start to use mmap
420
420
421 * force_nodemap:
421 * force_nodemap:
422 force the usage of a "development" version of the nodemap code
422 force the usage of a "development" version of the nodemap code
423 """
423 """
424 mmapindexthreshold = None
424 mmapindexthreshold = None
425 opts = self.opener.options
425 opts = self.opener.options
426
426
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
427 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
428 new_header = CHANGELOGV2
428 new_header = CHANGELOGV2
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
429 self._compute_rank = opts.get(b'changelogv2.compute-rank', True)
430 elif b'revlogv2' in opts:
430 elif b'revlogv2' in opts:
431 new_header = REVLOGV2
431 new_header = REVLOGV2
432 elif b'revlogv1' in opts:
432 elif b'revlogv1' in opts:
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
433 new_header = REVLOGV1 | FLAG_INLINE_DATA
434 if b'generaldelta' in opts:
434 if b'generaldelta' in opts:
435 new_header |= FLAG_GENERALDELTA
435 new_header |= FLAG_GENERALDELTA
436 elif b'revlogv0' in self.opener.options:
436 elif b'revlogv0' in self.opener.options:
437 new_header = REVLOGV0
437 new_header = REVLOGV0
438 else:
438 else:
439 new_header = REVLOG_DEFAULT_VERSION
439 new_header = REVLOG_DEFAULT_VERSION
440
440
441 if b'chunkcachesize' in opts:
441 if b'chunkcachesize' in opts:
442 self._chunkcachesize = opts[b'chunkcachesize']
442 self._chunkcachesize = opts[b'chunkcachesize']
443 if b'maxchainlen' in opts:
443 if b'maxchainlen' in opts:
444 self._maxchainlen = opts[b'maxchainlen']
444 self._maxchainlen = opts[b'maxchainlen']
445 if b'deltabothparents' in opts:
445 if b'deltabothparents' in opts:
446 self._deltabothparents = opts[b'deltabothparents']
446 self._deltabothparents = opts[b'deltabothparents']
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
447 dps_cgds = opts.get(b'delta-parent-search.candidate-group-chunk-size')
448 if dps_cgds:
448 if dps_cgds:
449 self._candidate_group_chunk_size = dps_cgds
449 self._candidate_group_chunk_size = dps_cgds
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
450 self._lazydelta = bool(opts.get(b'lazydelta', True))
451 self._lazydeltabase = False
451 self._lazydeltabase = False
452 if self._lazydelta:
452 if self._lazydelta:
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
453 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
454 if b'debug-delta' in opts:
454 if b'debug-delta' in opts:
455 self._debug_delta = opts[b'debug-delta']
455 self._debug_delta = opts[b'debug-delta']
456 if b'compengine' in opts:
456 if b'compengine' in opts:
457 self._compengine = opts[b'compengine']
457 self._compengine = opts[b'compengine']
458 if b'zlib.level' in opts:
458 if b'zlib.level' in opts:
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
459 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
460 if b'zstd.level' in opts:
460 if b'zstd.level' in opts:
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
461 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
462 if b'maxdeltachainspan' in opts:
462 if b'maxdeltachainspan' in opts:
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
463 self._maxdeltachainspan = opts[b'maxdeltachainspan']
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
464 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
465 mmapindexthreshold = opts[b'mmapindexthreshold']
465 mmapindexthreshold = opts[b'mmapindexthreshold']
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
466 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
467 withsparseread = bool(opts.get(b'with-sparse-read', False))
468 # sparse-revlog forces sparse-read
468 # sparse-revlog forces sparse-read
469 self._withsparseread = self._sparserevlog or withsparseread
469 self._withsparseread = self._sparserevlog or withsparseread
470 if b'sparse-read-density-threshold' in opts:
470 if b'sparse-read-density-threshold' in opts:
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
471 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
472 if b'sparse-read-min-gap-size' in opts:
472 if b'sparse-read-min-gap-size' in opts:
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
473 self._srmingapsize = opts[b'sparse-read-min-gap-size']
474 if opts.get(b'enableellipsis'):
474 if opts.get(b'enableellipsis'):
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
475 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
476
476
477 # revlog v0 doesn't have flag processors
477 # revlog v0 doesn't have flag processors
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
478 for flag, processor in opts.get(b'flagprocessors', {}).items():
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
479 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
480
480
481 if self._chunkcachesize <= 0:
481 if self._chunkcachesize <= 0:
482 raise error.RevlogError(
482 raise error.RevlogError(
483 _(b'revlog chunk cache size %r is not greater than 0')
483 _(b'revlog chunk cache size %r is not greater than 0')
484 % self._chunkcachesize
484 % self._chunkcachesize
485 )
485 )
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
486 elif self._chunkcachesize & (self._chunkcachesize - 1):
487 raise error.RevlogError(
487 raise error.RevlogError(
488 _(b'revlog chunk cache size %r is not a power of 2')
488 _(b'revlog chunk cache size %r is not a power of 2')
489 % self._chunkcachesize
489 % self._chunkcachesize
490 )
490 )
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
491 force_nodemap = opts.get(b'devel-force-nodemap', False)
492 return new_header, mmapindexthreshold, force_nodemap
492 return new_header, mmapindexthreshold, force_nodemap
493
493
494 def _get_data(self, filepath, mmap_threshold, size=None):
494 def _get_data(self, filepath, mmap_threshold, size=None):
495 """return a file content with or without mmap
495 """return a file content with or without mmap
496
496
497 If the file is missing return the empty string"""
497 If the file is missing return the empty string"""
498 try:
498 try:
499 with self.opener(filepath) as fp:
499 with self.opener(filepath) as fp:
500 if mmap_threshold is not None:
500 if mmap_threshold is not None:
501 file_size = self.opener.fstat(fp).st_size
501 file_size = self.opener.fstat(fp).st_size
502 if file_size >= mmap_threshold:
502 if file_size >= mmap_threshold:
503 if size is not None:
503 if size is not None:
504 # avoid potentiel mmap crash
504 # avoid potentiel mmap crash
505 size = min(file_size, size)
505 size = min(file_size, size)
506 # TODO: should .close() to release resources without
506 # TODO: should .close() to release resources without
507 # relying on Python GC
507 # relying on Python GC
508 if size is None:
508 if size is None:
509 return util.buffer(util.mmapread(fp))
509 return util.buffer(util.mmapread(fp))
510 else:
510 else:
511 return util.buffer(util.mmapread(fp, size))
511 return util.buffer(util.mmapread(fp, size))
512 if size is None:
512 if size is None:
513 return fp.read()
513 return fp.read()
514 else:
514 else:
515 return fp.read(size)
515 return fp.read(size)
516 except FileNotFoundError:
516 except FileNotFoundError:
517 return b''
517 return b''
518
518
519 def get_streams(self, max_linkrev, force_inline=False):
519 def get_streams(self, max_linkrev, force_inline=False):
520 """return a list of streams that represent this revlog
520 """return a list of streams that represent this revlog
521
521
522 This is used by stream-clone to do bytes to bytes copies of a repository.
522 This is used by stream-clone to do bytes to bytes copies of a repository.
523
523
524 This streams data for all revisions that refer to a changelog revision up
524 This streams data for all revisions that refer to a changelog revision up
525 to `max_linkrev`.
525 to `max_linkrev`.
526
526
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
527 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
528
528
529 It returns is a list of three-tuple:
529 It returns is a list of three-tuple:
530
530
531 [
531 [
532 (filename, bytes_stream, stream_size),
532 (filename, bytes_stream, stream_size),
533 …
533 …
534 ]
534 ]
535 """
535 """
536 n = len(self)
536 n = len(self)
537 index = self.index
537 index = self.index
538 while n > 0:
538 while n > 0:
539 linkrev = index[n - 1][4]
539 linkrev = index[n - 1][4]
540 if linkrev < max_linkrev:
540 if linkrev < max_linkrev:
541 break
541 break
542 # note: this loop will rarely go through multiple iterations, since
542 # note: this loop will rarely go through multiple iterations, since
543 # it only traverses commits created during the current streaming
543 # it only traverses commits created during the current streaming
544 # pull operation.
544 # pull operation.
545 #
545 #
546 # If this become a problem, using a binary search should cap the
546 # If this become a problem, using a binary search should cap the
547 # runtime of this.
547 # runtime of this.
548 n = n - 1
548 n = n - 1
549 if n == 0:
549 if n == 0:
550 # no data to send
550 # no data to send
551 return []
551 return []
552 index_size = n * index.entry_size
552 index_size = n * index.entry_size
553 data_size = self.end(n - 1)
553 data_size = self.end(n - 1)
554
554
555 # XXX we might have been split (or stripped) since the object
555 # XXX we might have been split (or stripped) since the object
556 # initialization, We need to close this race too, but having a way to
556 # initialization, We need to close this race too, but having a way to
557 # pre-open the file we feed to the revlog and never closing them before
557 # pre-open the file we feed to the revlog and never closing them before
558 # we are done streaming.
558 # we are done streaming.
559
559
560 if self._inline:
560 if self._inline:
561
561
562 def get_stream():
562 def get_stream():
563 with self._indexfp() as fp:
563 with self._indexfp() as fp:
564 yield None
564 yield None
565 size = index_size + data_size
565 size = index_size + data_size
566 if size <= 65536:
566 if size <= 65536:
567 yield fp.read(size)
567 yield fp.read(size)
568 else:
568 else:
569 yield from util.filechunkiter(fp, limit=size)
569 yield from util.filechunkiter(fp, limit=size)
570
570
571 inline_stream = get_stream()
571 inline_stream = get_stream()
572 next(inline_stream)
572 next(inline_stream)
573 return [
573 return [
574 (self._indexfile, inline_stream, index_size + data_size),
574 (self._indexfile, inline_stream, index_size + data_size),
575 ]
575 ]
576 elif force_inline:
576 elif force_inline:
577
577
578 def get_stream():
578 def get_stream():
579 with self._datafp() as fp_d:
579 with self._datafp() as fp_d:
580 yield None
580 yield None
581
581
582 for rev in range(n):
582 for rev in range(n):
583 idx = self.index.entry_binary(rev)
583 idx = self.index.entry_binary(rev)
584 if rev == 0 and self._docket is None:
584 if rev == 0 and self._docket is None:
585 # re-inject the inline flag
585 # re-inject the inline flag
586 header = self._format_flags
586 header = self._format_flags
587 header |= self._format_version
587 header |= self._format_version
588 header |= FLAG_INLINE_DATA
588 header |= FLAG_INLINE_DATA
589 header = self.index.pack_header(header)
589 header = self.index.pack_header(header)
590 idx = header + idx
590 idx = header + idx
591 yield idx
591 yield idx
592 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
592 yield self._getsegmentforrevs(rev, rev, df=fp_d)[1]
593
593
594 inline_stream = get_stream()
594 inline_stream = get_stream()
595 next(inline_stream)
595 next(inline_stream)
596 return [
596 return [
597 (self._indexfile, inline_stream, index_size + data_size),
597 (self._indexfile, inline_stream, index_size + data_size),
598 ]
598 ]
599 else:
599 else:
600
600
601 def get_index_stream():
601 def get_index_stream():
602 with self._indexfp() as fp:
602 with self._indexfp() as fp:
603 yield None
603 yield None
604 if index_size <= 65536:
604 if index_size <= 65536:
605 yield fp.read(index_size)
605 yield fp.read(index_size)
606 else:
606 else:
607 yield from util.filechunkiter(fp, limit=index_size)
607 yield from util.filechunkiter(fp, limit=index_size)
608
608
609 def get_data_stream():
609 def get_data_stream():
610 with self._datafp() as fp:
610 with self._datafp() as fp:
611 yield None
611 yield None
612 if data_size <= 65536:
612 if data_size <= 65536:
613 yield fp.read(data_size)
613 yield fp.read(data_size)
614 else:
614 else:
615 yield from util.filechunkiter(fp, limit=data_size)
615 yield from util.filechunkiter(fp, limit=data_size)
616
616
617 index_stream = get_index_stream()
617 index_stream = get_index_stream()
618 next(index_stream)
618 next(index_stream)
619 data_stream = get_data_stream()
619 data_stream = get_data_stream()
620 next(data_stream)
620 next(data_stream)
621 return [
621 return [
622 (self._datafile, data_stream, data_size),
622 (self._datafile, data_stream, data_size),
623 (self._indexfile, index_stream, index_size),
623 (self._indexfile, index_stream, index_size),
624 ]
624 ]
625
625
626 def _loadindex(self, docket=None):
626 def _loadindex(self, docket=None):
627
627
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
628 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
629
629
630 if self.postfix is not None:
630 if self.postfix is not None:
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
631 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
632 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
633 entry_point = b'%s.i.a' % self.radix
633 entry_point = b'%s.i.a' % self.radix
634 elif self._try_split and self.opener.exists(self._split_index_file):
634 elif self._try_split and self.opener.exists(self._split_index_file):
635 entry_point = self._split_index_file
635 entry_point = self._split_index_file
636 else:
636 else:
637 entry_point = b'%s.i' % self.radix
637 entry_point = b'%s.i' % self.radix
638
638
639 if docket is not None:
639 if docket is not None:
640 self._docket = docket
640 self._docket = docket
641 self._docket_file = entry_point
641 self._docket_file = entry_point
642 else:
642 else:
643 self._initempty = True
643 self._initempty = True
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
644 entry_data = self._get_data(entry_point, mmapindexthreshold)
645 if len(entry_data) > 0:
645 if len(entry_data) > 0:
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
646 header = INDEX_HEADER.unpack(entry_data[:4])[0]
647 self._initempty = False
647 self._initempty = False
648 else:
648 else:
649 header = new_header
649 header = new_header
650
650
651 self._format_flags = header & ~0xFFFF
651 self._format_flags = header & ~0xFFFF
652 self._format_version = header & 0xFFFF
652 self._format_version = header & 0xFFFF
653
653
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
654 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
655 if supported_flags is None:
655 if supported_flags is None:
656 msg = _(b'unknown version (%d) in revlog %s')
656 msg = _(b'unknown version (%d) in revlog %s')
657 msg %= (self._format_version, self.display_id)
657 msg %= (self._format_version, self.display_id)
658 raise error.RevlogError(msg)
658 raise error.RevlogError(msg)
659 elif self._format_flags & ~supported_flags:
659 elif self._format_flags & ~supported_flags:
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
660 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
661 display_flag = self._format_flags >> 16
661 display_flag = self._format_flags >> 16
662 msg %= (display_flag, self._format_version, self.display_id)
662 msg %= (display_flag, self._format_version, self.display_id)
663 raise error.RevlogError(msg)
663 raise error.RevlogError(msg)
664
664
665 features = FEATURES_BY_VERSION[self._format_version]
665 features = FEATURES_BY_VERSION[self._format_version]
666 self._inline = features[b'inline'](self._format_flags)
666 self._inline = features[b'inline'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
667 self._generaldelta = features[b'generaldelta'](self._format_flags)
668 self.hassidedata = features[b'sidedata']
668 self.hassidedata = features[b'sidedata']
669
669
670 if not features[b'docket']:
670 if not features[b'docket']:
671 self._indexfile = entry_point
671 self._indexfile = entry_point
672 index_data = entry_data
672 index_data = entry_data
673 else:
673 else:
674 self._docket_file = entry_point
674 self._docket_file = entry_point
675 if self._initempty:
675 if self._initempty:
676 self._docket = docketutil.default_docket(self, header)
676 self._docket = docketutil.default_docket(self, header)
677 else:
677 else:
678 self._docket = docketutil.parse_docket(
678 self._docket = docketutil.parse_docket(
679 self, entry_data, use_pending=self._trypending
679 self, entry_data, use_pending=self._trypending
680 )
680 )
681
681
682 if self._docket is not None:
682 if self._docket is not None:
683 self._indexfile = self._docket.index_filepath()
683 self._indexfile = self._docket.index_filepath()
684 index_data = b''
684 index_data = b''
685 index_size = self._docket.index_end
685 index_size = self._docket.index_end
686 if index_size > 0:
686 if index_size > 0:
687 index_data = self._get_data(
687 index_data = self._get_data(
688 self._indexfile, mmapindexthreshold, size=index_size
688 self._indexfile, mmapindexthreshold, size=index_size
689 )
689 )
690 if len(index_data) < index_size:
690 if len(index_data) < index_size:
691 msg = _(b'too few index data for %s: got %d, expected %d')
691 msg = _(b'too few index data for %s: got %d, expected %d')
692 msg %= (self.display_id, len(index_data), index_size)
692 msg %= (self.display_id, len(index_data), index_size)
693 raise error.RevlogError(msg)
693 raise error.RevlogError(msg)
694
694
695 self._inline = False
695 self._inline = False
696 # generaldelta implied by version 2 revlogs.
696 # generaldelta implied by version 2 revlogs.
697 self._generaldelta = True
697 self._generaldelta = True
698 # the logic for persistent nodemap will be dealt with within the
698 # the logic for persistent nodemap will be dealt with within the
699 # main docket, so disable it for now.
699 # main docket, so disable it for now.
700 self._nodemap_file = None
700 self._nodemap_file = None
701
701
702 if self._docket is not None:
702 if self._docket is not None:
703 self._datafile = self._docket.data_filepath()
703 self._datafile = self._docket.data_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
704 self._sidedatafile = self._docket.sidedata_filepath()
705 elif self.postfix is None:
705 elif self.postfix is None:
706 self._datafile = b'%s.d' % self.radix
706 self._datafile = b'%s.d' % self.radix
707 else:
707 else:
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
708 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
709
709
710 self.nodeconstants = sha1nodeconstants
710 self.nodeconstants = sha1nodeconstants
711 self.nullid = self.nodeconstants.nullid
711 self.nullid = self.nodeconstants.nullid
712
712
713 # sparse-revlog can't be on without general-delta (issue6056)
713 # sparse-revlog can't be on without general-delta (issue6056)
714 if not self._generaldelta:
714 if not self._generaldelta:
715 self._sparserevlog = False
715 self._sparserevlog = False
716
716
717 self._storedeltachains = True
717 self._storedeltachains = True
718
718
719 devel_nodemap = (
719 devel_nodemap = (
720 self._nodemap_file
720 self._nodemap_file
721 and force_nodemap
721 and force_nodemap
722 and parse_index_v1_nodemap is not None
722 and parse_index_v1_nodemap is not None
723 )
723 )
724
724
725 use_rust_index = False
725 use_rust_index = False
726 if rustrevlog is not None:
726 if rustrevlog is not None:
727 if self._nodemap_file is not None:
727 if self._nodemap_file is not None:
728 use_rust_index = True
728 use_rust_index = True
729 else:
729 else:
730 use_rust_index = self.opener.options.get(b'rust.index')
730 use_rust_index = self.opener.options.get(b'rust.index')
731
731
732 self._parse_index = parse_index_v1
732 self._parse_index = parse_index_v1
733 if self._format_version == REVLOGV0:
733 if self._format_version == REVLOGV0:
734 self._parse_index = revlogv0.parse_index_v0
734 self._parse_index = revlogv0.parse_index_v0
735 elif self._format_version == REVLOGV2:
735 elif self._format_version == REVLOGV2:
736 self._parse_index = parse_index_v2
736 self._parse_index = parse_index_v2
737 elif self._format_version == CHANGELOGV2:
737 elif self._format_version == CHANGELOGV2:
738 self._parse_index = parse_index_cl_v2
738 self._parse_index = parse_index_cl_v2
739 elif devel_nodemap:
739 elif devel_nodemap:
740 self._parse_index = parse_index_v1_nodemap
740 self._parse_index = parse_index_v1_nodemap
741 elif use_rust_index:
741 elif use_rust_index:
742 self._parse_index = parse_index_v1_mixed
742 self._parse_index = parse_index_v1_mixed
743 try:
743 try:
744 d = self._parse_index(index_data, self._inline)
744 d = self._parse_index(index_data, self._inline)
745 index, chunkcache = d
745 index, chunkcache = d
746 use_nodemap = (
746 use_nodemap = (
747 not self._inline
747 not self._inline
748 and self._nodemap_file is not None
748 and self._nodemap_file is not None
749 and hasattr(index, 'update_nodemap_data')
749 and hasattr(index, 'update_nodemap_data')
750 )
750 )
751 if use_nodemap:
751 if use_nodemap:
752 nodemap_data = nodemaputil.persisted_data(self)
752 nodemap_data = nodemaputil.persisted_data(self)
753 if nodemap_data is not None:
753 if nodemap_data is not None:
754 docket = nodemap_data[0]
754 docket = nodemap_data[0]
755 if (
755 if (
756 len(d[0]) > docket.tip_rev
756 len(d[0]) > docket.tip_rev
757 and d[0][docket.tip_rev][7] == docket.tip_node
757 and d[0][docket.tip_rev][7] == docket.tip_node
758 ):
758 ):
759 # no changelog tampering
759 # no changelog tampering
760 self._nodemap_docket = docket
760 self._nodemap_docket = docket
761 index.update_nodemap_data(*nodemap_data)
761 index.update_nodemap_data(*nodemap_data)
762 except (ValueError, IndexError):
762 except (ValueError, IndexError):
763 raise error.RevlogError(
763 raise error.RevlogError(
764 _(b"index %s is corrupted") % self.display_id
764 _(b"index %s is corrupted") % self.display_id
765 )
765 )
766 self.index = index
766 self.index = index
767 self._segmentfile = randomaccessfile.randomaccessfile(
767 self._segmentfile = randomaccessfile.randomaccessfile(
768 self.opener,
768 self.opener,
769 (self._indexfile if self._inline else self._datafile),
769 (self._indexfile if self._inline else self._datafile),
770 self._chunkcachesize,
770 self._chunkcachesize,
771 chunkcache,
771 chunkcache,
772 )
772 )
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
773 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
774 self.opener,
774 self.opener,
775 self._sidedatafile,
775 self._sidedatafile,
776 self._chunkcachesize,
776 self._chunkcachesize,
777 )
777 )
778 # revnum -> (chain-length, sum-delta-length)
778 # revnum -> (chain-length, sum-delta-length)
779 self._chaininfocache = util.lrucachedict(500)
779 self._chaininfocache = util.lrucachedict(500)
780 # revlog header -> revlog compressor
780 # revlog header -> revlog compressor
781 self._decompressors = {}
781 self._decompressors = {}
782
782
783 def get_revlog(self):
783 def get_revlog(self):
784 """simple function to mirror API of other not-really-revlog API"""
784 """simple function to mirror API of other not-really-revlog API"""
785 return self
785 return self
786
786
787 @util.propertycache
787 @util.propertycache
788 def revlog_kind(self):
788 def revlog_kind(self):
789 return self.target[0]
789 return self.target[0]
790
790
791 @util.propertycache
791 @util.propertycache
792 def display_id(self):
792 def display_id(self):
793 """The public facing "ID" of the revlog that we use in message"""
793 """The public facing "ID" of the revlog that we use in message"""
794 if self.revlog_kind == KIND_FILELOG:
794 if self.revlog_kind == KIND_FILELOG:
795 # Reference the file without the "data/" prefix, so it is familiar
795 # Reference the file without the "data/" prefix, so it is familiar
796 # to the user.
796 # to the user.
797 return self.target[1]
797 return self.target[1]
798 else:
798 else:
799 return self.radix
799 return self.radix
800
800
801 def _get_decompressor(self, t):
801 def _get_decompressor(self, t):
802 try:
802 try:
803 compressor = self._decompressors[t]
803 compressor = self._decompressors[t]
804 except KeyError:
804 except KeyError:
805 try:
805 try:
806 engine = util.compengines.forrevlogheader(t)
806 engine = util.compengines.forrevlogheader(t)
807 compressor = engine.revlogcompressor(self._compengineopts)
807 compressor = engine.revlogcompressor(self._compengineopts)
808 self._decompressors[t] = compressor
808 self._decompressors[t] = compressor
809 except KeyError:
809 except KeyError:
810 raise error.RevlogError(
810 raise error.RevlogError(
811 _(b'unknown compression type %s') % binascii.hexlify(t)
811 _(b'unknown compression type %s') % binascii.hexlify(t)
812 )
812 )
813 return compressor
813 return compressor
814
814
815 @util.propertycache
815 @util.propertycache
816 def _compressor(self):
816 def _compressor(self):
817 engine = util.compengines[self._compengine]
817 engine = util.compengines[self._compengine]
818 return engine.revlogcompressor(self._compengineopts)
818 return engine.revlogcompressor(self._compengineopts)
819
819
820 @util.propertycache
820 @util.propertycache
821 def _decompressor(self):
821 def _decompressor(self):
822 """the default decompressor"""
822 """the default decompressor"""
823 if self._docket is None:
823 if self._docket is None:
824 return None
824 return None
825 t = self._docket.default_compression_header
825 t = self._docket.default_compression_header
826 c = self._get_decompressor(t)
826 c = self._get_decompressor(t)
827 return c.decompress
827 return c.decompress
828
828
829 def _indexfp(self):
829 def _indexfp(self):
830 """file object for the revlog's index file"""
830 """file object for the revlog's index file"""
831 return self.opener(self._indexfile, mode=b"r")
831 return self.opener(self._indexfile, mode=b"r")
832
832
833 def __index_write_fp(self):
833 def __index_write_fp(self):
834 # You should not use this directly and use `_writing` instead
834 # You should not use this directly and use `_writing` instead
835 try:
835 try:
836 f = self.opener(
836 f = self.opener(
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
837 self._indexfile, mode=b"r+", checkambig=self._checkambig
838 )
838 )
839 if self._docket is None:
839 if self._docket is None:
840 f.seek(0, os.SEEK_END)
840 f.seek(0, os.SEEK_END)
841 else:
841 else:
842 f.seek(self._docket.index_end, os.SEEK_SET)
842 f.seek(self._docket.index_end, os.SEEK_SET)
843 return f
843 return f
844 except FileNotFoundError:
844 except FileNotFoundError:
845 return self.opener(
845 return self.opener(
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
846 self._indexfile, mode=b"w+", checkambig=self._checkambig
847 )
847 )
848
848
849 def __index_new_fp(self):
849 def __index_new_fp(self):
850 # You should not use this unless you are upgrading from inline revlog
850 # You should not use this unless you are upgrading from inline revlog
851 return self.opener(
851 return self.opener(
852 self._indexfile,
852 self._indexfile,
853 mode=b"w",
853 mode=b"w",
854 checkambig=self._checkambig,
854 checkambig=self._checkambig,
855 atomictemp=True,
855 atomictemp=True,
856 )
856 )
857
857
858 def _datafp(self, mode=b'r'):
858 def _datafp(self, mode=b'r'):
859 """file object for the revlog's data file"""
859 """file object for the revlog's data file"""
860 return self.opener(self._datafile, mode=mode)
860 return self.opener(self._datafile, mode=mode)
861
861
862 @contextlib.contextmanager
862 @contextlib.contextmanager
863 def _sidedatareadfp(self):
863 def _sidedatareadfp(self):
864 """file object suitable to read sidedata"""
864 """file object suitable to read sidedata"""
865 if self._writinghandles:
865 if self._writinghandles:
866 yield self._writinghandles[2]
866 yield self._writinghandles[2]
867 else:
867 else:
868 with self.opener(self._sidedatafile) as fp:
868 with self.opener(self._sidedatafile) as fp:
869 yield fp
869 yield fp
870
870
871 def tiprev(self):
871 def tiprev(self):
872 return len(self.index) - 1
872 return len(self.index) - 1
873
873
874 def tip(self):
874 def tip(self):
875 return self.node(self.tiprev())
875 return self.node(self.tiprev())
876
876
877 def __contains__(self, rev):
877 def __contains__(self, rev):
878 return 0 <= rev < len(self)
878 return 0 <= rev < len(self)
879
879
880 def __len__(self):
880 def __len__(self):
881 return len(self.index)
881 return len(self.index)
882
882
883 def __iter__(self):
883 def __iter__(self):
884 return iter(range(len(self)))
884 return iter(range(len(self)))
885
885
886 def revs(self, start=0, stop=None):
886 def revs(self, start=0, stop=None):
887 """iterate over all rev in this revlog (from start to stop)"""
887 """iterate over all rev in this revlog (from start to stop)"""
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
888 return storageutil.iterrevs(len(self), start=start, stop=stop)
889
889
890 def hasnode(self, node):
890 def hasnode(self, node):
891 try:
891 try:
892 self.rev(node)
892 self.rev(node)
893 return True
893 return True
894 except KeyError:
894 except KeyError:
895 return False
895 return False
896
896
897 def _candelta(self, baserev, rev):
897 def _candelta(self, baserev, rev):
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
898 """whether two revisions (baserev, rev) can be delta-ed or not"""
899 # Disable delta if either rev requires a content-changing flag
899 # Disable delta if either rev requires a content-changing flag
900 # processor (ex. LFS). This is because such flag processor can alter
900 # processor (ex. LFS). This is because such flag processor can alter
901 # the rawtext content that the delta will be based on, and two clients
901 # the rawtext content that the delta will be based on, and two clients
902 # could have a same revlog node with different flags (i.e. different
902 # could have a same revlog node with different flags (i.e. different
903 # rawtext contents) and the delta could be incompatible.
903 # rawtext contents) and the delta could be incompatible.
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
904 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
905 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
906 ):
906 ):
907 return False
907 return False
908 return True
908 return True
909
909
910 def update_caches(self, transaction):
910 def update_caches(self, transaction):
911 """update on disk cache
911 """update on disk cache
912
912
913 If a transaction is passed, the update may be delayed to transaction
913 If a transaction is passed, the update may be delayed to transaction
914 commit."""
914 commit."""
915 if self._nodemap_file is not None:
915 if self._nodemap_file is not None:
916 if transaction is None:
916 if transaction is None:
917 nodemaputil.update_persistent_nodemap(self)
917 nodemaputil.update_persistent_nodemap(self)
918 else:
918 else:
919 nodemaputil.setup_persistent_nodemap(transaction, self)
919 nodemaputil.setup_persistent_nodemap(transaction, self)
920
920
921 def clearcaches(self):
921 def clearcaches(self):
922 """Clear in-memory caches"""
922 self._revisioncache = None
923 self._revisioncache = None
923 self._chainbasecache.clear()
924 self._chainbasecache.clear()
924 self._segmentfile.clear_cache()
925 self._segmentfile.clear_cache()
925 self._segmentfile_sidedata.clear_cache()
926 self._segmentfile_sidedata.clear_cache()
926 self._pcache = {}
927 self._pcache = {}
927 self._nodemap_docket = None
928 self._nodemap_docket = None
928 self.index.clearcaches()
929 self.index.clearcaches()
929 # The python code is the one responsible for validating the docket, we
930 # The python code is the one responsible for validating the docket, we
930 # end up having to refresh it here.
931 # end up having to refresh it here.
931 use_nodemap = (
932 use_nodemap = (
932 not self._inline
933 not self._inline
933 and self._nodemap_file is not None
934 and self._nodemap_file is not None
934 and hasattr(self.index, 'update_nodemap_data')
935 and hasattr(self.index, 'update_nodemap_data')
935 )
936 )
936 if use_nodemap:
937 if use_nodemap:
937 nodemap_data = nodemaputil.persisted_data(self)
938 nodemap_data = nodemaputil.persisted_data(self)
938 if nodemap_data is not None:
939 if nodemap_data is not None:
939 self._nodemap_docket = nodemap_data[0]
940 self._nodemap_docket = nodemap_data[0]
940 self.index.update_nodemap_data(*nodemap_data)
941 self.index.update_nodemap_data(*nodemap_data)
941
942
942 def rev(self, node):
943 def rev(self, node):
943 try:
944 try:
944 return self.index.rev(node)
945 return self.index.rev(node)
945 except TypeError:
946 except TypeError:
946 raise
947 raise
947 except error.RevlogError:
948 except error.RevlogError:
948 # parsers.c radix tree lookup failed
949 # parsers.c radix tree lookup failed
949 if (
950 if (
950 node == self.nodeconstants.wdirid
951 node == self.nodeconstants.wdirid
951 or node in self.nodeconstants.wdirfilenodeids
952 or node in self.nodeconstants.wdirfilenodeids
952 ):
953 ):
953 raise error.WdirUnsupported
954 raise error.WdirUnsupported
954 raise error.LookupError(node, self.display_id, _(b'no node'))
955 raise error.LookupError(node, self.display_id, _(b'no node'))
955
956
956 # Accessors for index entries.
957 # Accessors for index entries.
957
958
958 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
959 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
959 # are flags.
960 # are flags.
960 def start(self, rev):
961 def start(self, rev):
961 return int(self.index[rev][0] >> 16)
962 return int(self.index[rev][0] >> 16)
962
963
963 def sidedata_cut_off(self, rev):
964 def sidedata_cut_off(self, rev):
964 sd_cut_off = self.index[rev][8]
965 sd_cut_off = self.index[rev][8]
965 if sd_cut_off != 0:
966 if sd_cut_off != 0:
966 return sd_cut_off
967 return sd_cut_off
967 # This is some annoying dance, because entries without sidedata
968 # This is some annoying dance, because entries without sidedata
968 # currently use 0 as their ofsset. (instead of previous-offset +
969 # currently use 0 as their ofsset. (instead of previous-offset +
969 # previous-size)
970 # previous-size)
970 #
971 #
971 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
972 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
972 # In the meantime, we need this.
973 # In the meantime, we need this.
973 while 0 <= rev:
974 while 0 <= rev:
974 e = self.index[rev]
975 e = self.index[rev]
975 if e[9] != 0:
976 if e[9] != 0:
976 return e[8] + e[9]
977 return e[8] + e[9]
977 rev -= 1
978 rev -= 1
978 return 0
979 return 0
979
980
980 def flags(self, rev):
981 def flags(self, rev):
981 return self.index[rev][0] & 0xFFFF
982 return self.index[rev][0] & 0xFFFF
982
983
983 def length(self, rev):
984 def length(self, rev):
984 return self.index[rev][1]
985 return self.index[rev][1]
985
986
986 def sidedata_length(self, rev):
987 def sidedata_length(self, rev):
987 if not self.hassidedata:
988 if not self.hassidedata:
988 return 0
989 return 0
989 return self.index[rev][9]
990 return self.index[rev][9]
990
991
991 def rawsize(self, rev):
992 def rawsize(self, rev):
992 """return the length of the uncompressed text for a given revision"""
993 """return the length of the uncompressed text for a given revision"""
993 l = self.index[rev][2]
994 l = self.index[rev][2]
994 if l >= 0:
995 if l >= 0:
995 return l
996 return l
996
997
997 t = self.rawdata(rev)
998 t = self.rawdata(rev)
998 return len(t)
999 return len(t)
999
1000
1000 def size(self, rev):
1001 def size(self, rev):
1001 """length of non-raw text (processed by a "read" flag processor)"""
1002 """length of non-raw text (processed by a "read" flag processor)"""
1002 # fast path: if no "read" flag processor could change the content,
1003 # fast path: if no "read" flag processor could change the content,
1003 # size is rawsize. note: ELLIPSIS is known to not change the content.
1004 # size is rawsize. note: ELLIPSIS is known to not change the content.
1004 flags = self.flags(rev)
1005 flags = self.flags(rev)
1005 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1006 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1006 return self.rawsize(rev)
1007 return self.rawsize(rev)
1007
1008
1008 return len(self.revision(rev))
1009 return len(self.revision(rev))
1009
1010
1010 def fast_rank(self, rev):
1011 def fast_rank(self, rev):
1011 """Return the rank of a revision if already known, or None otherwise.
1012 """Return the rank of a revision if already known, or None otherwise.
1012
1013
1013 The rank of a revision is the size of the sub-graph it defines as a
1014 The rank of a revision is the size of the sub-graph it defines as a
1014 head. Equivalently, the rank of a revision `r` is the size of the set
1015 head. Equivalently, the rank of a revision `r` is the size of the set
1015 `ancestors(r)`, `r` included.
1016 `ancestors(r)`, `r` included.
1016
1017
1017 This method returns the rank retrieved from the revlog in constant
1018 This method returns the rank retrieved from the revlog in constant
1018 time. It makes no attempt at computing unknown values for versions of
1019 time. It makes no attempt at computing unknown values for versions of
1019 the revlog which do not persist the rank.
1020 the revlog which do not persist the rank.
1020 """
1021 """
1021 rank = self.index[rev][ENTRY_RANK]
1022 rank = self.index[rev][ENTRY_RANK]
1022 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1023 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1023 return None
1024 return None
1024 if rev == nullrev:
1025 if rev == nullrev:
1025 return 0 # convention
1026 return 0 # convention
1026 return rank
1027 return rank
1027
1028
1028 def chainbase(self, rev):
1029 def chainbase(self, rev):
1029 base = self._chainbasecache.get(rev)
1030 base = self._chainbasecache.get(rev)
1030 if base is not None:
1031 if base is not None:
1031 return base
1032 return base
1032
1033
1033 index = self.index
1034 index = self.index
1034 iterrev = rev
1035 iterrev = rev
1035 base = index[iterrev][3]
1036 base = index[iterrev][3]
1036 while base != iterrev:
1037 while base != iterrev:
1037 iterrev = base
1038 iterrev = base
1038 base = index[iterrev][3]
1039 base = index[iterrev][3]
1039
1040
1040 self._chainbasecache[rev] = base
1041 self._chainbasecache[rev] = base
1041 return base
1042 return base
1042
1043
1043 def linkrev(self, rev):
1044 def linkrev(self, rev):
1044 return self.index[rev][4]
1045 return self.index[rev][4]
1045
1046
1046 def parentrevs(self, rev):
1047 def parentrevs(self, rev):
1047 try:
1048 try:
1048 entry = self.index[rev]
1049 entry = self.index[rev]
1049 except IndexError:
1050 except IndexError:
1050 if rev == wdirrev:
1051 if rev == wdirrev:
1051 raise error.WdirUnsupported
1052 raise error.WdirUnsupported
1052 raise
1053 raise
1053
1054
1054 if self.canonical_parent_order and entry[5] == nullrev:
1055 if self.canonical_parent_order and entry[5] == nullrev:
1055 return entry[6], entry[5]
1056 return entry[6], entry[5]
1056 else:
1057 else:
1057 return entry[5], entry[6]
1058 return entry[5], entry[6]
1058
1059
1059 # fast parentrevs(rev) where rev isn't filtered
1060 # fast parentrevs(rev) where rev isn't filtered
1060 _uncheckedparentrevs = parentrevs
1061 _uncheckedparentrevs = parentrevs
1061
1062
1062 def node(self, rev):
1063 def node(self, rev):
1063 try:
1064 try:
1064 return self.index[rev][7]
1065 return self.index[rev][7]
1065 except IndexError:
1066 except IndexError:
1066 if rev == wdirrev:
1067 if rev == wdirrev:
1067 raise error.WdirUnsupported
1068 raise error.WdirUnsupported
1068 raise
1069 raise
1069
1070
1070 # Derived from index values.
1071 # Derived from index values.
1071
1072
1072 def end(self, rev):
1073 def end(self, rev):
1073 return self.start(rev) + self.length(rev)
1074 return self.start(rev) + self.length(rev)
1074
1075
1075 def parents(self, node):
1076 def parents(self, node):
1076 i = self.index
1077 i = self.index
1077 d = i[self.rev(node)]
1078 d = i[self.rev(node)]
1078 # inline node() to avoid function call overhead
1079 # inline node() to avoid function call overhead
1079 if self.canonical_parent_order and d[5] == self.nullid:
1080 if self.canonical_parent_order and d[5] == self.nullid:
1080 return i[d[6]][7], i[d[5]][7]
1081 return i[d[6]][7], i[d[5]][7]
1081 else:
1082 else:
1082 return i[d[5]][7], i[d[6]][7]
1083 return i[d[5]][7], i[d[6]][7]
1083
1084
1084 def chainlen(self, rev):
1085 def chainlen(self, rev):
1085 return self._chaininfo(rev)[0]
1086 return self._chaininfo(rev)[0]
1086
1087
1087 def _chaininfo(self, rev):
1088 def _chaininfo(self, rev):
1088 chaininfocache = self._chaininfocache
1089 chaininfocache = self._chaininfocache
1089 if rev in chaininfocache:
1090 if rev in chaininfocache:
1090 return chaininfocache[rev]
1091 return chaininfocache[rev]
1091 index = self.index
1092 index = self.index
1092 generaldelta = self._generaldelta
1093 generaldelta = self._generaldelta
1093 iterrev = rev
1094 iterrev = rev
1094 e = index[iterrev]
1095 e = index[iterrev]
1095 clen = 0
1096 clen = 0
1096 compresseddeltalen = 0
1097 compresseddeltalen = 0
1097 while iterrev != e[3]:
1098 while iterrev != e[3]:
1098 clen += 1
1099 clen += 1
1099 compresseddeltalen += e[1]
1100 compresseddeltalen += e[1]
1100 if generaldelta:
1101 if generaldelta:
1101 iterrev = e[3]
1102 iterrev = e[3]
1102 else:
1103 else:
1103 iterrev -= 1
1104 iterrev -= 1
1104 if iterrev in chaininfocache:
1105 if iterrev in chaininfocache:
1105 t = chaininfocache[iterrev]
1106 t = chaininfocache[iterrev]
1106 clen += t[0]
1107 clen += t[0]
1107 compresseddeltalen += t[1]
1108 compresseddeltalen += t[1]
1108 break
1109 break
1109 e = index[iterrev]
1110 e = index[iterrev]
1110 else:
1111 else:
1111 # Add text length of base since decompressing that also takes
1112 # Add text length of base since decompressing that also takes
1112 # work. For cache hits the length is already included.
1113 # work. For cache hits the length is already included.
1113 compresseddeltalen += e[1]
1114 compresseddeltalen += e[1]
1114 r = (clen, compresseddeltalen)
1115 r = (clen, compresseddeltalen)
1115 chaininfocache[rev] = r
1116 chaininfocache[rev] = r
1116 return r
1117 return r
1117
1118
1118 def _deltachain(self, rev, stoprev=None):
1119 def _deltachain(self, rev, stoprev=None):
1119 """Obtain the delta chain for a revision.
1120 """Obtain the delta chain for a revision.
1120
1121
1121 ``stoprev`` specifies a revision to stop at. If not specified, we
1122 ``stoprev`` specifies a revision to stop at. If not specified, we
1122 stop at the base of the chain.
1123 stop at the base of the chain.
1123
1124
1124 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1125 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1125 revs in ascending order and ``stopped`` is a bool indicating whether
1126 revs in ascending order and ``stopped`` is a bool indicating whether
1126 ``stoprev`` was hit.
1127 ``stoprev`` was hit.
1127 """
1128 """
1128 # Try C implementation.
1129 # Try C implementation.
1129 try:
1130 try:
1130 return self.index.deltachain(rev, stoprev, self._generaldelta)
1131 return self.index.deltachain(rev, stoprev, self._generaldelta)
1131 except AttributeError:
1132 except AttributeError:
1132 pass
1133 pass
1133
1134
1134 chain = []
1135 chain = []
1135
1136
1136 # Alias to prevent attribute lookup in tight loop.
1137 # Alias to prevent attribute lookup in tight loop.
1137 index = self.index
1138 index = self.index
1138 generaldelta = self._generaldelta
1139 generaldelta = self._generaldelta
1139
1140
1140 iterrev = rev
1141 iterrev = rev
1141 e = index[iterrev]
1142 e = index[iterrev]
1142 while iterrev != e[3] and iterrev != stoprev:
1143 while iterrev != e[3] and iterrev != stoprev:
1143 chain.append(iterrev)
1144 chain.append(iterrev)
1144 if generaldelta:
1145 if generaldelta:
1145 iterrev = e[3]
1146 iterrev = e[3]
1146 else:
1147 else:
1147 iterrev -= 1
1148 iterrev -= 1
1148 e = index[iterrev]
1149 e = index[iterrev]
1149
1150
1150 if iterrev == stoprev:
1151 if iterrev == stoprev:
1151 stopped = True
1152 stopped = True
1152 else:
1153 else:
1153 chain.append(iterrev)
1154 chain.append(iterrev)
1154 stopped = False
1155 stopped = False
1155
1156
1156 chain.reverse()
1157 chain.reverse()
1157 return chain, stopped
1158 return chain, stopped
1158
1159
1159 def ancestors(self, revs, stoprev=0, inclusive=False):
1160 def ancestors(self, revs, stoprev=0, inclusive=False):
1160 """Generate the ancestors of 'revs' in reverse revision order.
1161 """Generate the ancestors of 'revs' in reverse revision order.
1161 Does not generate revs lower than stoprev.
1162 Does not generate revs lower than stoprev.
1162
1163
1163 See the documentation for ancestor.lazyancestors for more details."""
1164 See the documentation for ancestor.lazyancestors for more details."""
1164
1165
1165 # first, make sure start revisions aren't filtered
1166 # first, make sure start revisions aren't filtered
1166 revs = list(revs)
1167 revs = list(revs)
1167 checkrev = self.node
1168 checkrev = self.node
1168 for r in revs:
1169 for r in revs:
1169 checkrev(r)
1170 checkrev(r)
1170 # and we're sure ancestors aren't filtered as well
1171 # and we're sure ancestors aren't filtered as well
1171
1172
1172 if rustancestor is not None and self.index.rust_ext_compat:
1173 if rustancestor is not None and self.index.rust_ext_compat:
1173 lazyancestors = rustancestor.LazyAncestors
1174 lazyancestors = rustancestor.LazyAncestors
1174 arg = self.index
1175 arg = self.index
1175 else:
1176 else:
1176 lazyancestors = ancestor.lazyancestors
1177 lazyancestors = ancestor.lazyancestors
1177 arg = self._uncheckedparentrevs
1178 arg = self._uncheckedparentrevs
1178 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1179 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1179
1180
1180 def descendants(self, revs):
1181 def descendants(self, revs):
1181 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1182 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1182
1183
1183 def findcommonmissing(self, common=None, heads=None):
1184 def findcommonmissing(self, common=None, heads=None):
1184 """Return a tuple of the ancestors of common and the ancestors of heads
1185 """Return a tuple of the ancestors of common and the ancestors of heads
1185 that are not ancestors of common. In revset terminology, we return the
1186 that are not ancestors of common. In revset terminology, we return the
1186 tuple:
1187 tuple:
1187
1188
1188 ::common, (::heads) - (::common)
1189 ::common, (::heads) - (::common)
1189
1190
1190 The list is sorted by revision number, meaning it is
1191 The list is sorted by revision number, meaning it is
1191 topologically sorted.
1192 topologically sorted.
1192
1193
1193 'heads' and 'common' are both lists of node IDs. If heads is
1194 'heads' and 'common' are both lists of node IDs. If heads is
1194 not supplied, uses all of the revlog's heads. If common is not
1195 not supplied, uses all of the revlog's heads. If common is not
1195 supplied, uses nullid."""
1196 supplied, uses nullid."""
1196 if common is None:
1197 if common is None:
1197 common = [self.nullid]
1198 common = [self.nullid]
1198 if heads is None:
1199 if heads is None:
1199 heads = self.heads()
1200 heads = self.heads()
1200
1201
1201 common = [self.rev(n) for n in common]
1202 common = [self.rev(n) for n in common]
1202 heads = [self.rev(n) for n in heads]
1203 heads = [self.rev(n) for n in heads]
1203
1204
1204 # we want the ancestors, but inclusive
1205 # we want the ancestors, but inclusive
1205 class lazyset:
1206 class lazyset:
1206 def __init__(self, lazyvalues):
1207 def __init__(self, lazyvalues):
1207 self.addedvalues = set()
1208 self.addedvalues = set()
1208 self.lazyvalues = lazyvalues
1209 self.lazyvalues = lazyvalues
1209
1210
1210 def __contains__(self, value):
1211 def __contains__(self, value):
1211 return value in self.addedvalues or value in self.lazyvalues
1212 return value in self.addedvalues or value in self.lazyvalues
1212
1213
1213 def __iter__(self):
1214 def __iter__(self):
1214 added = self.addedvalues
1215 added = self.addedvalues
1215 for r in added:
1216 for r in added:
1216 yield r
1217 yield r
1217 for r in self.lazyvalues:
1218 for r in self.lazyvalues:
1218 if not r in added:
1219 if not r in added:
1219 yield r
1220 yield r
1220
1221
1221 def add(self, value):
1222 def add(self, value):
1222 self.addedvalues.add(value)
1223 self.addedvalues.add(value)
1223
1224
1224 def update(self, values):
1225 def update(self, values):
1225 self.addedvalues.update(values)
1226 self.addedvalues.update(values)
1226
1227
1227 has = lazyset(self.ancestors(common))
1228 has = lazyset(self.ancestors(common))
1228 has.add(nullrev)
1229 has.add(nullrev)
1229 has.update(common)
1230 has.update(common)
1230
1231
1231 # take all ancestors from heads that aren't in has
1232 # take all ancestors from heads that aren't in has
1232 missing = set()
1233 missing = set()
1233 visit = collections.deque(r for r in heads if r not in has)
1234 visit = collections.deque(r for r in heads if r not in has)
1234 while visit:
1235 while visit:
1235 r = visit.popleft()
1236 r = visit.popleft()
1236 if r in missing:
1237 if r in missing:
1237 continue
1238 continue
1238 else:
1239 else:
1239 missing.add(r)
1240 missing.add(r)
1240 for p in self.parentrevs(r):
1241 for p in self.parentrevs(r):
1241 if p not in has:
1242 if p not in has:
1242 visit.append(p)
1243 visit.append(p)
1243 missing = list(missing)
1244 missing = list(missing)
1244 missing.sort()
1245 missing.sort()
1245 return has, [self.node(miss) for miss in missing]
1246 return has, [self.node(miss) for miss in missing]
1246
1247
1247 def incrementalmissingrevs(self, common=None):
1248 def incrementalmissingrevs(self, common=None):
1248 """Return an object that can be used to incrementally compute the
1249 """Return an object that can be used to incrementally compute the
1249 revision numbers of the ancestors of arbitrary sets that are not
1250 revision numbers of the ancestors of arbitrary sets that are not
1250 ancestors of common. This is an ancestor.incrementalmissingancestors
1251 ancestors of common. This is an ancestor.incrementalmissingancestors
1251 object.
1252 object.
1252
1253
1253 'common' is a list of revision numbers. If common is not supplied, uses
1254 'common' is a list of revision numbers. If common is not supplied, uses
1254 nullrev.
1255 nullrev.
1255 """
1256 """
1256 if common is None:
1257 if common is None:
1257 common = [nullrev]
1258 common = [nullrev]
1258
1259
1259 if rustancestor is not None and self.index.rust_ext_compat:
1260 if rustancestor is not None and self.index.rust_ext_compat:
1260 return rustancestor.MissingAncestors(self.index, common)
1261 return rustancestor.MissingAncestors(self.index, common)
1261 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1262 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1262
1263
1263 def findmissingrevs(self, common=None, heads=None):
1264 def findmissingrevs(self, common=None, heads=None):
1264 """Return the revision numbers of the ancestors of heads that
1265 """Return the revision numbers of the ancestors of heads that
1265 are not ancestors of common.
1266 are not ancestors of common.
1266
1267
1267 More specifically, return a list of revision numbers corresponding to
1268 More specifically, return a list of revision numbers corresponding to
1268 nodes N such that every N satisfies the following constraints:
1269 nodes N such that every N satisfies the following constraints:
1269
1270
1270 1. N is an ancestor of some node in 'heads'
1271 1. N is an ancestor of some node in 'heads'
1271 2. N is not an ancestor of any node in 'common'
1272 2. N is not an ancestor of any node in 'common'
1272
1273
1273 The list is sorted by revision number, meaning it is
1274 The list is sorted by revision number, meaning it is
1274 topologically sorted.
1275 topologically sorted.
1275
1276
1276 'heads' and 'common' are both lists of revision numbers. If heads is
1277 'heads' and 'common' are both lists of revision numbers. If heads is
1277 not supplied, uses all of the revlog's heads. If common is not
1278 not supplied, uses all of the revlog's heads. If common is not
1278 supplied, uses nullid."""
1279 supplied, uses nullid."""
1279 if common is None:
1280 if common is None:
1280 common = [nullrev]
1281 common = [nullrev]
1281 if heads is None:
1282 if heads is None:
1282 heads = self.headrevs()
1283 heads = self.headrevs()
1283
1284
1284 inc = self.incrementalmissingrevs(common=common)
1285 inc = self.incrementalmissingrevs(common=common)
1285 return inc.missingancestors(heads)
1286 return inc.missingancestors(heads)
1286
1287
1287 def findmissing(self, common=None, heads=None):
1288 def findmissing(self, common=None, heads=None):
1288 """Return the ancestors of heads that are not ancestors of common.
1289 """Return the ancestors of heads that are not ancestors of common.
1289
1290
1290 More specifically, return a list of nodes N such that every N
1291 More specifically, return a list of nodes N such that every N
1291 satisfies the following constraints:
1292 satisfies the following constraints:
1292
1293
1293 1. N is an ancestor of some node in 'heads'
1294 1. N is an ancestor of some node in 'heads'
1294 2. N is not an ancestor of any node in 'common'
1295 2. N is not an ancestor of any node in 'common'
1295
1296
1296 The list is sorted by revision number, meaning it is
1297 The list is sorted by revision number, meaning it is
1297 topologically sorted.
1298 topologically sorted.
1298
1299
1299 'heads' and 'common' are both lists of node IDs. If heads is
1300 'heads' and 'common' are both lists of node IDs. If heads is
1300 not supplied, uses all of the revlog's heads. If common is not
1301 not supplied, uses all of the revlog's heads. If common is not
1301 supplied, uses nullid."""
1302 supplied, uses nullid."""
1302 if common is None:
1303 if common is None:
1303 common = [self.nullid]
1304 common = [self.nullid]
1304 if heads is None:
1305 if heads is None:
1305 heads = self.heads()
1306 heads = self.heads()
1306
1307
1307 common = [self.rev(n) for n in common]
1308 common = [self.rev(n) for n in common]
1308 heads = [self.rev(n) for n in heads]
1309 heads = [self.rev(n) for n in heads]
1309
1310
1310 inc = self.incrementalmissingrevs(common=common)
1311 inc = self.incrementalmissingrevs(common=common)
1311 return [self.node(r) for r in inc.missingancestors(heads)]
1312 return [self.node(r) for r in inc.missingancestors(heads)]
1312
1313
1313 def nodesbetween(self, roots=None, heads=None):
1314 def nodesbetween(self, roots=None, heads=None):
1314 """Return a topological path from 'roots' to 'heads'.
1315 """Return a topological path from 'roots' to 'heads'.
1315
1316
1316 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1317 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1317 topologically sorted list of all nodes N that satisfy both of
1318 topologically sorted list of all nodes N that satisfy both of
1318 these constraints:
1319 these constraints:
1319
1320
1320 1. N is a descendant of some node in 'roots'
1321 1. N is a descendant of some node in 'roots'
1321 2. N is an ancestor of some node in 'heads'
1322 2. N is an ancestor of some node in 'heads'
1322
1323
1323 Every node is considered to be both a descendant and an ancestor
1324 Every node is considered to be both a descendant and an ancestor
1324 of itself, so every reachable node in 'roots' and 'heads' will be
1325 of itself, so every reachable node in 'roots' and 'heads' will be
1325 included in 'nodes'.
1326 included in 'nodes'.
1326
1327
1327 'outroots' is the list of reachable nodes in 'roots', i.e., the
1328 'outroots' is the list of reachable nodes in 'roots', i.e., the
1328 subset of 'roots' that is returned in 'nodes'. Likewise,
1329 subset of 'roots' that is returned in 'nodes'. Likewise,
1329 'outheads' is the subset of 'heads' that is also in 'nodes'.
1330 'outheads' is the subset of 'heads' that is also in 'nodes'.
1330
1331
1331 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1332 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1332 unspecified, uses nullid as the only root. If 'heads' is
1333 unspecified, uses nullid as the only root. If 'heads' is
1333 unspecified, uses list of all of the revlog's heads."""
1334 unspecified, uses list of all of the revlog's heads."""
1334 nonodes = ([], [], [])
1335 nonodes = ([], [], [])
1335 if roots is not None:
1336 if roots is not None:
1336 roots = list(roots)
1337 roots = list(roots)
1337 if not roots:
1338 if not roots:
1338 return nonodes
1339 return nonodes
1339 lowestrev = min([self.rev(n) for n in roots])
1340 lowestrev = min([self.rev(n) for n in roots])
1340 else:
1341 else:
1341 roots = [self.nullid] # Everybody's a descendant of nullid
1342 roots = [self.nullid] # Everybody's a descendant of nullid
1342 lowestrev = nullrev
1343 lowestrev = nullrev
1343 if (lowestrev == nullrev) and (heads is None):
1344 if (lowestrev == nullrev) and (heads is None):
1344 # We want _all_ the nodes!
1345 # We want _all_ the nodes!
1345 return (
1346 return (
1346 [self.node(r) for r in self],
1347 [self.node(r) for r in self],
1347 [self.nullid],
1348 [self.nullid],
1348 list(self.heads()),
1349 list(self.heads()),
1349 )
1350 )
1350 if heads is None:
1351 if heads is None:
1351 # All nodes are ancestors, so the latest ancestor is the last
1352 # All nodes are ancestors, so the latest ancestor is the last
1352 # node.
1353 # node.
1353 highestrev = len(self) - 1
1354 highestrev = len(self) - 1
1354 # Set ancestors to None to signal that every node is an ancestor.
1355 # Set ancestors to None to signal that every node is an ancestor.
1355 ancestors = None
1356 ancestors = None
1356 # Set heads to an empty dictionary for later discovery of heads
1357 # Set heads to an empty dictionary for later discovery of heads
1357 heads = {}
1358 heads = {}
1358 else:
1359 else:
1359 heads = list(heads)
1360 heads = list(heads)
1360 if not heads:
1361 if not heads:
1361 return nonodes
1362 return nonodes
1362 ancestors = set()
1363 ancestors = set()
1363 # Turn heads into a dictionary so we can remove 'fake' heads.
1364 # Turn heads into a dictionary so we can remove 'fake' heads.
1364 # Also, later we will be using it to filter out the heads we can't
1365 # Also, later we will be using it to filter out the heads we can't
1365 # find from roots.
1366 # find from roots.
1366 heads = dict.fromkeys(heads, False)
1367 heads = dict.fromkeys(heads, False)
1367 # Start at the top and keep marking parents until we're done.
1368 # Start at the top and keep marking parents until we're done.
1368 nodestotag = set(heads)
1369 nodestotag = set(heads)
1369 # Remember where the top was so we can use it as a limit later.
1370 # Remember where the top was so we can use it as a limit later.
1370 highestrev = max([self.rev(n) for n in nodestotag])
1371 highestrev = max([self.rev(n) for n in nodestotag])
1371 while nodestotag:
1372 while nodestotag:
1372 # grab a node to tag
1373 # grab a node to tag
1373 n = nodestotag.pop()
1374 n = nodestotag.pop()
1374 # Never tag nullid
1375 # Never tag nullid
1375 if n == self.nullid:
1376 if n == self.nullid:
1376 continue
1377 continue
1377 # A node's revision number represents its place in a
1378 # A node's revision number represents its place in a
1378 # topologically sorted list of nodes.
1379 # topologically sorted list of nodes.
1379 r = self.rev(n)
1380 r = self.rev(n)
1380 if r >= lowestrev:
1381 if r >= lowestrev:
1381 if n not in ancestors:
1382 if n not in ancestors:
1382 # If we are possibly a descendant of one of the roots
1383 # If we are possibly a descendant of one of the roots
1383 # and we haven't already been marked as an ancestor
1384 # and we haven't already been marked as an ancestor
1384 ancestors.add(n) # Mark as ancestor
1385 ancestors.add(n) # Mark as ancestor
1385 # Add non-nullid parents to list of nodes to tag.
1386 # Add non-nullid parents to list of nodes to tag.
1386 nodestotag.update(
1387 nodestotag.update(
1387 [p for p in self.parents(n) if p != self.nullid]
1388 [p for p in self.parents(n) if p != self.nullid]
1388 )
1389 )
1389 elif n in heads: # We've seen it before, is it a fake head?
1390 elif n in heads: # We've seen it before, is it a fake head?
1390 # So it is, real heads should not be the ancestors of
1391 # So it is, real heads should not be the ancestors of
1391 # any other heads.
1392 # any other heads.
1392 heads.pop(n)
1393 heads.pop(n)
1393 if not ancestors:
1394 if not ancestors:
1394 return nonodes
1395 return nonodes
1395 # Now that we have our set of ancestors, we want to remove any
1396 # Now that we have our set of ancestors, we want to remove any
1396 # roots that are not ancestors.
1397 # roots that are not ancestors.
1397
1398
1398 # If one of the roots was nullid, everything is included anyway.
1399 # If one of the roots was nullid, everything is included anyway.
1399 if lowestrev > nullrev:
1400 if lowestrev > nullrev:
1400 # But, since we weren't, let's recompute the lowest rev to not
1401 # But, since we weren't, let's recompute the lowest rev to not
1401 # include roots that aren't ancestors.
1402 # include roots that aren't ancestors.
1402
1403
1403 # Filter out roots that aren't ancestors of heads
1404 # Filter out roots that aren't ancestors of heads
1404 roots = [root for root in roots if root in ancestors]
1405 roots = [root for root in roots if root in ancestors]
1405 # Recompute the lowest revision
1406 # Recompute the lowest revision
1406 if roots:
1407 if roots:
1407 lowestrev = min([self.rev(root) for root in roots])
1408 lowestrev = min([self.rev(root) for root in roots])
1408 else:
1409 else:
1409 # No more roots? Return empty list
1410 # No more roots? Return empty list
1410 return nonodes
1411 return nonodes
1411 else:
1412 else:
1412 # We are descending from nullid, and don't need to care about
1413 # We are descending from nullid, and don't need to care about
1413 # any other roots.
1414 # any other roots.
1414 lowestrev = nullrev
1415 lowestrev = nullrev
1415 roots = [self.nullid]
1416 roots = [self.nullid]
1416 # Transform our roots list into a set.
1417 # Transform our roots list into a set.
1417 descendants = set(roots)
1418 descendants = set(roots)
1418 # Also, keep the original roots so we can filter out roots that aren't
1419 # Also, keep the original roots so we can filter out roots that aren't
1419 # 'real' roots (i.e. are descended from other roots).
1420 # 'real' roots (i.e. are descended from other roots).
1420 roots = descendants.copy()
1421 roots = descendants.copy()
1421 # Our topologically sorted list of output nodes.
1422 # Our topologically sorted list of output nodes.
1422 orderedout = []
1423 orderedout = []
1423 # Don't start at nullid since we don't want nullid in our output list,
1424 # Don't start at nullid since we don't want nullid in our output list,
1424 # and if nullid shows up in descendants, empty parents will look like
1425 # and if nullid shows up in descendants, empty parents will look like
1425 # they're descendants.
1426 # they're descendants.
1426 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1427 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1427 n = self.node(r)
1428 n = self.node(r)
1428 isdescendant = False
1429 isdescendant = False
1429 if lowestrev == nullrev: # Everybody is a descendant of nullid
1430 if lowestrev == nullrev: # Everybody is a descendant of nullid
1430 isdescendant = True
1431 isdescendant = True
1431 elif n in descendants:
1432 elif n in descendants:
1432 # n is already a descendant
1433 # n is already a descendant
1433 isdescendant = True
1434 isdescendant = True
1434 # This check only needs to be done here because all the roots
1435 # This check only needs to be done here because all the roots
1435 # will start being marked is descendants before the loop.
1436 # will start being marked is descendants before the loop.
1436 if n in roots:
1437 if n in roots:
1437 # If n was a root, check if it's a 'real' root.
1438 # If n was a root, check if it's a 'real' root.
1438 p = tuple(self.parents(n))
1439 p = tuple(self.parents(n))
1439 # If any of its parents are descendants, it's not a root.
1440 # If any of its parents are descendants, it's not a root.
1440 if (p[0] in descendants) or (p[1] in descendants):
1441 if (p[0] in descendants) or (p[1] in descendants):
1441 roots.remove(n)
1442 roots.remove(n)
1442 else:
1443 else:
1443 p = tuple(self.parents(n))
1444 p = tuple(self.parents(n))
1444 # A node is a descendant if either of its parents are
1445 # A node is a descendant if either of its parents are
1445 # descendants. (We seeded the dependents list with the roots
1446 # descendants. (We seeded the dependents list with the roots
1446 # up there, remember?)
1447 # up there, remember?)
1447 if (p[0] in descendants) or (p[1] in descendants):
1448 if (p[0] in descendants) or (p[1] in descendants):
1448 descendants.add(n)
1449 descendants.add(n)
1449 isdescendant = True
1450 isdescendant = True
1450 if isdescendant and ((ancestors is None) or (n in ancestors)):
1451 if isdescendant and ((ancestors is None) or (n in ancestors)):
1451 # Only include nodes that are both descendants and ancestors.
1452 # Only include nodes that are both descendants and ancestors.
1452 orderedout.append(n)
1453 orderedout.append(n)
1453 if (ancestors is not None) and (n in heads):
1454 if (ancestors is not None) and (n in heads):
1454 # We're trying to figure out which heads are reachable
1455 # We're trying to figure out which heads are reachable
1455 # from roots.
1456 # from roots.
1456 # Mark this head as having been reached
1457 # Mark this head as having been reached
1457 heads[n] = True
1458 heads[n] = True
1458 elif ancestors is None:
1459 elif ancestors is None:
1459 # Otherwise, we're trying to discover the heads.
1460 # Otherwise, we're trying to discover the heads.
1460 # Assume this is a head because if it isn't, the next step
1461 # Assume this is a head because if it isn't, the next step
1461 # will eventually remove it.
1462 # will eventually remove it.
1462 heads[n] = True
1463 heads[n] = True
1463 # But, obviously its parents aren't.
1464 # But, obviously its parents aren't.
1464 for p in self.parents(n):
1465 for p in self.parents(n):
1465 heads.pop(p, None)
1466 heads.pop(p, None)
1466 heads = [head for head, flag in heads.items() if flag]
1467 heads = [head for head, flag in heads.items() if flag]
1467 roots = list(roots)
1468 roots = list(roots)
1468 assert orderedout
1469 assert orderedout
1469 assert roots
1470 assert roots
1470 assert heads
1471 assert heads
1471 return (orderedout, roots, heads)
1472 return (orderedout, roots, heads)
1472
1473
1473 def headrevs(self, revs=None):
1474 def headrevs(self, revs=None):
1474 if revs is None:
1475 if revs is None:
1475 try:
1476 try:
1476 return self.index.headrevs()
1477 return self.index.headrevs()
1477 except AttributeError:
1478 except AttributeError:
1478 return self._headrevs()
1479 return self._headrevs()
1479 if rustdagop is not None and self.index.rust_ext_compat:
1480 if rustdagop is not None and self.index.rust_ext_compat:
1480 return rustdagop.headrevs(self.index, revs)
1481 return rustdagop.headrevs(self.index, revs)
1481 return dagop.headrevs(revs, self._uncheckedparentrevs)
1482 return dagop.headrevs(revs, self._uncheckedparentrevs)
1482
1483
1483 def computephases(self, roots):
1484 def computephases(self, roots):
1484 return self.index.computephasesmapsets(roots)
1485 return self.index.computephasesmapsets(roots)
1485
1486
1486 def _headrevs(self):
1487 def _headrevs(self):
1487 count = len(self)
1488 count = len(self)
1488 if not count:
1489 if not count:
1489 return [nullrev]
1490 return [nullrev]
1490 # we won't iter over filtered rev so nobody is a head at start
1491 # we won't iter over filtered rev so nobody is a head at start
1491 ishead = [0] * (count + 1)
1492 ishead = [0] * (count + 1)
1492 index = self.index
1493 index = self.index
1493 for r in self:
1494 for r in self:
1494 ishead[r] = 1 # I may be an head
1495 ishead[r] = 1 # I may be an head
1495 e = index[r]
1496 e = index[r]
1496 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1497 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1497 return [r for r, val in enumerate(ishead) if val]
1498 return [r for r, val in enumerate(ishead) if val]
1498
1499
1499 def heads(self, start=None, stop=None):
1500 def heads(self, start=None, stop=None):
1500 """return the list of all nodes that have no children
1501 """return the list of all nodes that have no children
1501
1502
1502 if start is specified, only heads that are descendants of
1503 if start is specified, only heads that are descendants of
1503 start will be returned
1504 start will be returned
1504 if stop is specified, it will consider all the revs from stop
1505 if stop is specified, it will consider all the revs from stop
1505 as if they had no children
1506 as if they had no children
1506 """
1507 """
1507 if start is None and stop is None:
1508 if start is None and stop is None:
1508 if not len(self):
1509 if not len(self):
1509 return [self.nullid]
1510 return [self.nullid]
1510 return [self.node(r) for r in self.headrevs()]
1511 return [self.node(r) for r in self.headrevs()]
1511
1512
1512 if start is None:
1513 if start is None:
1513 start = nullrev
1514 start = nullrev
1514 else:
1515 else:
1515 start = self.rev(start)
1516 start = self.rev(start)
1516
1517
1517 stoprevs = {self.rev(n) for n in stop or []}
1518 stoprevs = {self.rev(n) for n in stop or []}
1518
1519
1519 revs = dagop.headrevssubset(
1520 revs = dagop.headrevssubset(
1520 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1521 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1521 )
1522 )
1522
1523
1523 return [self.node(rev) for rev in revs]
1524 return [self.node(rev) for rev in revs]
1524
1525
1525 def children(self, node):
1526 def children(self, node):
1526 """find the children of a given node"""
1527 """find the children of a given node"""
1527 c = []
1528 c = []
1528 p = self.rev(node)
1529 p = self.rev(node)
1529 for r in self.revs(start=p + 1):
1530 for r in self.revs(start=p + 1):
1530 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1531 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1531 if prevs:
1532 if prevs:
1532 for pr in prevs:
1533 for pr in prevs:
1533 if pr == p:
1534 if pr == p:
1534 c.append(self.node(r))
1535 c.append(self.node(r))
1535 elif p == nullrev:
1536 elif p == nullrev:
1536 c.append(self.node(r))
1537 c.append(self.node(r))
1537 return c
1538 return c
1538
1539
1539 def commonancestorsheads(self, a, b):
1540 def commonancestorsheads(self, a, b):
1540 """calculate all the heads of the common ancestors of nodes a and b"""
1541 """calculate all the heads of the common ancestors of nodes a and b"""
1541 a, b = self.rev(a), self.rev(b)
1542 a, b = self.rev(a), self.rev(b)
1542 ancs = self._commonancestorsheads(a, b)
1543 ancs = self._commonancestorsheads(a, b)
1543 return pycompat.maplist(self.node, ancs)
1544 return pycompat.maplist(self.node, ancs)
1544
1545
1545 def _commonancestorsheads(self, *revs):
1546 def _commonancestorsheads(self, *revs):
1546 """calculate all the heads of the common ancestors of revs"""
1547 """calculate all the heads of the common ancestors of revs"""
1547 try:
1548 try:
1548 ancs = self.index.commonancestorsheads(*revs)
1549 ancs = self.index.commonancestorsheads(*revs)
1549 except (AttributeError, OverflowError): # C implementation failed
1550 except (AttributeError, OverflowError): # C implementation failed
1550 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1551 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1551 return ancs
1552 return ancs
1552
1553
1553 def isancestor(self, a, b):
1554 def isancestor(self, a, b):
1554 """return True if node a is an ancestor of node b
1555 """return True if node a is an ancestor of node b
1555
1556
1556 A revision is considered an ancestor of itself."""
1557 A revision is considered an ancestor of itself."""
1557 a, b = self.rev(a), self.rev(b)
1558 a, b = self.rev(a), self.rev(b)
1558 return self.isancestorrev(a, b)
1559 return self.isancestorrev(a, b)
1559
1560
1560 def isancestorrev(self, a, b):
1561 def isancestorrev(self, a, b):
1561 """return True if revision a is an ancestor of revision b
1562 """return True if revision a is an ancestor of revision b
1562
1563
1563 A revision is considered an ancestor of itself.
1564 A revision is considered an ancestor of itself.
1564
1565
1565 The implementation of this is trivial but the use of
1566 The implementation of this is trivial but the use of
1566 reachableroots is not."""
1567 reachableroots is not."""
1567 if a == nullrev:
1568 if a == nullrev:
1568 return True
1569 return True
1569 elif a == b:
1570 elif a == b:
1570 return True
1571 return True
1571 elif a > b:
1572 elif a > b:
1572 return False
1573 return False
1573 return bool(self.reachableroots(a, [b], [a], includepath=False))
1574 return bool(self.reachableroots(a, [b], [a], includepath=False))
1574
1575
1575 def reachableroots(self, minroot, heads, roots, includepath=False):
1576 def reachableroots(self, minroot, heads, roots, includepath=False):
1576 """return (heads(::(<roots> and <roots>::<heads>)))
1577 """return (heads(::(<roots> and <roots>::<heads>)))
1577
1578
1578 If includepath is True, return (<roots>::<heads>)."""
1579 If includepath is True, return (<roots>::<heads>)."""
1579 try:
1580 try:
1580 return self.index.reachableroots2(
1581 return self.index.reachableroots2(
1581 minroot, heads, roots, includepath
1582 minroot, heads, roots, includepath
1582 )
1583 )
1583 except AttributeError:
1584 except AttributeError:
1584 return dagop._reachablerootspure(
1585 return dagop._reachablerootspure(
1585 self.parentrevs, minroot, roots, heads, includepath
1586 self.parentrevs, minroot, roots, heads, includepath
1586 )
1587 )
1587
1588
1588 def ancestor(self, a, b):
1589 def ancestor(self, a, b):
1589 """calculate the "best" common ancestor of nodes a and b"""
1590 """calculate the "best" common ancestor of nodes a and b"""
1590
1591
1591 a, b = self.rev(a), self.rev(b)
1592 a, b = self.rev(a), self.rev(b)
1592 try:
1593 try:
1593 ancs = self.index.ancestors(a, b)
1594 ancs = self.index.ancestors(a, b)
1594 except (AttributeError, OverflowError):
1595 except (AttributeError, OverflowError):
1595 ancs = ancestor.ancestors(self.parentrevs, a, b)
1596 ancs = ancestor.ancestors(self.parentrevs, a, b)
1596 if ancs:
1597 if ancs:
1597 # choose a consistent winner when there's a tie
1598 # choose a consistent winner when there's a tie
1598 return min(map(self.node, ancs))
1599 return min(map(self.node, ancs))
1599 return self.nullid
1600 return self.nullid
1600
1601
1601 def _match(self, id):
1602 def _match(self, id):
1602 if isinstance(id, int):
1603 if isinstance(id, int):
1603 # rev
1604 # rev
1604 return self.node(id)
1605 return self.node(id)
1605 if len(id) == self.nodeconstants.nodelen:
1606 if len(id) == self.nodeconstants.nodelen:
1606 # possibly a binary node
1607 # possibly a binary node
1607 # odds of a binary node being all hex in ASCII are 1 in 10**25
1608 # odds of a binary node being all hex in ASCII are 1 in 10**25
1608 try:
1609 try:
1609 node = id
1610 node = id
1610 self.rev(node) # quick search the index
1611 self.rev(node) # quick search the index
1611 return node
1612 return node
1612 except error.LookupError:
1613 except error.LookupError:
1613 pass # may be partial hex id
1614 pass # may be partial hex id
1614 try:
1615 try:
1615 # str(rev)
1616 # str(rev)
1616 rev = int(id)
1617 rev = int(id)
1617 if b"%d" % rev != id:
1618 if b"%d" % rev != id:
1618 raise ValueError
1619 raise ValueError
1619 if rev < 0:
1620 if rev < 0:
1620 rev = len(self) + rev
1621 rev = len(self) + rev
1621 if rev < 0 or rev >= len(self):
1622 if rev < 0 or rev >= len(self):
1622 raise ValueError
1623 raise ValueError
1623 return self.node(rev)
1624 return self.node(rev)
1624 except (ValueError, OverflowError):
1625 except (ValueError, OverflowError):
1625 pass
1626 pass
1626 if len(id) == 2 * self.nodeconstants.nodelen:
1627 if len(id) == 2 * self.nodeconstants.nodelen:
1627 try:
1628 try:
1628 # a full hex nodeid?
1629 # a full hex nodeid?
1629 node = bin(id)
1630 node = bin(id)
1630 self.rev(node)
1631 self.rev(node)
1631 return node
1632 return node
1632 except (binascii.Error, error.LookupError):
1633 except (binascii.Error, error.LookupError):
1633 pass
1634 pass
1634
1635
1635 def _partialmatch(self, id):
1636 def _partialmatch(self, id):
1636 # we don't care wdirfilenodeids as they should be always full hash
1637 # we don't care wdirfilenodeids as they should be always full hash
1637 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1638 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1638 ambiguous = False
1639 ambiguous = False
1639 try:
1640 try:
1640 partial = self.index.partialmatch(id)
1641 partial = self.index.partialmatch(id)
1641 if partial and self.hasnode(partial):
1642 if partial and self.hasnode(partial):
1642 if maybewdir:
1643 if maybewdir:
1643 # single 'ff...' match in radix tree, ambiguous with wdir
1644 # single 'ff...' match in radix tree, ambiguous with wdir
1644 ambiguous = True
1645 ambiguous = True
1645 else:
1646 else:
1646 return partial
1647 return partial
1647 elif maybewdir:
1648 elif maybewdir:
1648 # no 'ff...' match in radix tree, wdir identified
1649 # no 'ff...' match in radix tree, wdir identified
1649 raise error.WdirUnsupported
1650 raise error.WdirUnsupported
1650 else:
1651 else:
1651 return None
1652 return None
1652 except error.RevlogError:
1653 except error.RevlogError:
1653 # parsers.c radix tree lookup gave multiple matches
1654 # parsers.c radix tree lookup gave multiple matches
1654 # fast path: for unfiltered changelog, radix tree is accurate
1655 # fast path: for unfiltered changelog, radix tree is accurate
1655 if not getattr(self, 'filteredrevs', None):
1656 if not getattr(self, 'filteredrevs', None):
1656 ambiguous = True
1657 ambiguous = True
1657 # fall through to slow path that filters hidden revisions
1658 # fall through to slow path that filters hidden revisions
1658 except (AttributeError, ValueError):
1659 except (AttributeError, ValueError):
1659 # we are pure python, or key is not hex
1660 # we are pure python, or key is not hex
1660 pass
1661 pass
1661 if ambiguous:
1662 if ambiguous:
1662 raise error.AmbiguousPrefixLookupError(
1663 raise error.AmbiguousPrefixLookupError(
1663 id, self.display_id, _(b'ambiguous identifier')
1664 id, self.display_id, _(b'ambiguous identifier')
1664 )
1665 )
1665
1666
1666 if id in self._pcache:
1667 if id in self._pcache:
1667 return self._pcache[id]
1668 return self._pcache[id]
1668
1669
1669 if len(id) <= 40:
1670 if len(id) <= 40:
1670 # hex(node)[:...]
1671 # hex(node)[:...]
1671 l = len(id) // 2 * 2 # grab an even number of digits
1672 l = len(id) // 2 * 2 # grab an even number of digits
1672 try:
1673 try:
1673 # we're dropping the last digit, so let's check that it's hex,
1674 # we're dropping the last digit, so let's check that it's hex,
1674 # to avoid the expensive computation below if it's not
1675 # to avoid the expensive computation below if it's not
1675 if len(id) % 2 > 0:
1676 if len(id) % 2 > 0:
1676 if not (id[-1] in hexdigits):
1677 if not (id[-1] in hexdigits):
1677 return None
1678 return None
1678 prefix = bin(id[:l])
1679 prefix = bin(id[:l])
1679 except binascii.Error:
1680 except binascii.Error:
1680 pass
1681 pass
1681 else:
1682 else:
1682 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1683 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1683 nl = [
1684 nl = [
1684 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1685 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1685 ]
1686 ]
1686 if self.nodeconstants.nullhex.startswith(id):
1687 if self.nodeconstants.nullhex.startswith(id):
1687 nl.append(self.nullid)
1688 nl.append(self.nullid)
1688 if len(nl) > 0:
1689 if len(nl) > 0:
1689 if len(nl) == 1 and not maybewdir:
1690 if len(nl) == 1 and not maybewdir:
1690 self._pcache[id] = nl[0]
1691 self._pcache[id] = nl[0]
1691 return nl[0]
1692 return nl[0]
1692 raise error.AmbiguousPrefixLookupError(
1693 raise error.AmbiguousPrefixLookupError(
1693 id, self.display_id, _(b'ambiguous identifier')
1694 id, self.display_id, _(b'ambiguous identifier')
1694 )
1695 )
1695 if maybewdir:
1696 if maybewdir:
1696 raise error.WdirUnsupported
1697 raise error.WdirUnsupported
1697 return None
1698 return None
1698
1699
1699 def lookup(self, id):
1700 def lookup(self, id):
1700 """locate a node based on:
1701 """locate a node based on:
1701 - revision number or str(revision number)
1702 - revision number or str(revision number)
1702 - nodeid or subset of hex nodeid
1703 - nodeid or subset of hex nodeid
1703 """
1704 """
1704 n = self._match(id)
1705 n = self._match(id)
1705 if n is not None:
1706 if n is not None:
1706 return n
1707 return n
1707 n = self._partialmatch(id)
1708 n = self._partialmatch(id)
1708 if n:
1709 if n:
1709 return n
1710 return n
1710
1711
1711 raise error.LookupError(id, self.display_id, _(b'no match found'))
1712 raise error.LookupError(id, self.display_id, _(b'no match found'))
1712
1713
1713 def shortest(self, node, minlength=1):
1714 def shortest(self, node, minlength=1):
1714 """Find the shortest unambiguous prefix that matches node."""
1715 """Find the shortest unambiguous prefix that matches node."""
1715
1716
1716 def isvalid(prefix):
1717 def isvalid(prefix):
1717 try:
1718 try:
1718 matchednode = self._partialmatch(prefix)
1719 matchednode = self._partialmatch(prefix)
1719 except error.AmbiguousPrefixLookupError:
1720 except error.AmbiguousPrefixLookupError:
1720 return False
1721 return False
1721 except error.WdirUnsupported:
1722 except error.WdirUnsupported:
1722 # single 'ff...' match
1723 # single 'ff...' match
1723 return True
1724 return True
1724 if matchednode is None:
1725 if matchednode is None:
1725 raise error.LookupError(node, self.display_id, _(b'no node'))
1726 raise error.LookupError(node, self.display_id, _(b'no node'))
1726 return True
1727 return True
1727
1728
1728 def maybewdir(prefix):
1729 def maybewdir(prefix):
1729 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1730 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1730
1731
1731 hexnode = hex(node)
1732 hexnode = hex(node)
1732
1733
1733 def disambiguate(hexnode, minlength):
1734 def disambiguate(hexnode, minlength):
1734 """Disambiguate against wdirid."""
1735 """Disambiguate against wdirid."""
1735 for length in range(minlength, len(hexnode) + 1):
1736 for length in range(minlength, len(hexnode) + 1):
1736 prefix = hexnode[:length]
1737 prefix = hexnode[:length]
1737 if not maybewdir(prefix):
1738 if not maybewdir(prefix):
1738 return prefix
1739 return prefix
1739
1740
1740 if not getattr(self, 'filteredrevs', None):
1741 if not getattr(self, 'filteredrevs', None):
1741 try:
1742 try:
1742 length = max(self.index.shortest(node), minlength)
1743 length = max(self.index.shortest(node), minlength)
1743 return disambiguate(hexnode, length)
1744 return disambiguate(hexnode, length)
1744 except error.RevlogError:
1745 except error.RevlogError:
1745 if node != self.nodeconstants.wdirid:
1746 if node != self.nodeconstants.wdirid:
1746 raise error.LookupError(
1747 raise error.LookupError(
1747 node, self.display_id, _(b'no node')
1748 node, self.display_id, _(b'no node')
1748 )
1749 )
1749 except AttributeError:
1750 except AttributeError:
1750 # Fall through to pure code
1751 # Fall through to pure code
1751 pass
1752 pass
1752
1753
1753 if node == self.nodeconstants.wdirid:
1754 if node == self.nodeconstants.wdirid:
1754 for length in range(minlength, len(hexnode) + 1):
1755 for length in range(minlength, len(hexnode) + 1):
1755 prefix = hexnode[:length]
1756 prefix = hexnode[:length]
1756 if isvalid(prefix):
1757 if isvalid(prefix):
1757 return prefix
1758 return prefix
1758
1759
1759 for length in range(minlength, len(hexnode) + 1):
1760 for length in range(minlength, len(hexnode) + 1):
1760 prefix = hexnode[:length]
1761 prefix = hexnode[:length]
1761 if isvalid(prefix):
1762 if isvalid(prefix):
1762 return disambiguate(hexnode, length)
1763 return disambiguate(hexnode, length)
1763
1764
1764 def cmp(self, node, text):
1765 def cmp(self, node, text):
1765 """compare text with a given file revision
1766 """compare text with a given file revision
1766
1767
1767 returns True if text is different than what is stored.
1768 returns True if text is different than what is stored.
1768 """
1769 """
1769 p1, p2 = self.parents(node)
1770 p1, p2 = self.parents(node)
1770 return storageutil.hashrevisionsha1(text, p1, p2) != node
1771 return storageutil.hashrevisionsha1(text, p1, p2) != node
1771
1772
1772 def _getsegmentforrevs(self, startrev, endrev, df=None):
1773 def _getsegmentforrevs(self, startrev, endrev, df=None):
1773 """Obtain a segment of raw data corresponding to a range of revisions.
1774 """Obtain a segment of raw data corresponding to a range of revisions.
1774
1775
1775 Accepts the start and end revisions and an optional already-open
1776 Accepts the start and end revisions and an optional already-open
1776 file handle to be used for reading. If the file handle is read, its
1777 file handle to be used for reading. If the file handle is read, its
1777 seek position will not be preserved.
1778 seek position will not be preserved.
1778
1779
1779 Requests for data may be satisfied by a cache.
1780 Requests for data may be satisfied by a cache.
1780
1781
1781 Returns a 2-tuple of (offset, data) for the requested range of
1782 Returns a 2-tuple of (offset, data) for the requested range of
1782 revisions. Offset is the integer offset from the beginning of the
1783 revisions. Offset is the integer offset from the beginning of the
1783 revlog and data is a str or buffer of the raw byte data.
1784 revlog and data is a str or buffer of the raw byte data.
1784
1785
1785 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1786 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1786 to determine where each revision's data begins and ends.
1787 to determine where each revision's data begins and ends.
1787 """
1788 """
1788 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1789 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1789 # (functions are expensive).
1790 # (functions are expensive).
1790 index = self.index
1791 index = self.index
1791 istart = index[startrev]
1792 istart = index[startrev]
1792 start = int(istart[0] >> 16)
1793 start = int(istart[0] >> 16)
1793 if startrev == endrev:
1794 if startrev == endrev:
1794 end = start + istart[1]
1795 end = start + istart[1]
1795 else:
1796 else:
1796 iend = index[endrev]
1797 iend = index[endrev]
1797 end = int(iend[0] >> 16) + iend[1]
1798 end = int(iend[0] >> 16) + iend[1]
1798
1799
1799 if self._inline:
1800 if self._inline:
1800 start += (startrev + 1) * self.index.entry_size
1801 start += (startrev + 1) * self.index.entry_size
1801 end += (endrev + 1) * self.index.entry_size
1802 end += (endrev + 1) * self.index.entry_size
1802 length = end - start
1803 length = end - start
1803
1804
1804 return start, self._segmentfile.read_chunk(start, length, df)
1805 return start, self._segmentfile.read_chunk(start, length, df)
1805
1806
1806 def _chunk(self, rev, df=None):
1807 def _chunk(self, rev, df=None):
1807 """Obtain a single decompressed chunk for a revision.
1808 """Obtain a single decompressed chunk for a revision.
1808
1809
1809 Accepts an integer revision and an optional already-open file handle
1810 Accepts an integer revision and an optional already-open file handle
1810 to be used for reading. If used, the seek position of the file will not
1811 to be used for reading. If used, the seek position of the file will not
1811 be preserved.
1812 be preserved.
1812
1813
1813 Returns a str holding uncompressed data for the requested revision.
1814 Returns a str holding uncompressed data for the requested revision.
1814 """
1815 """
1815 compression_mode = self.index[rev][10]
1816 compression_mode = self.index[rev][10]
1816 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1817 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1817 if compression_mode == COMP_MODE_PLAIN:
1818 if compression_mode == COMP_MODE_PLAIN:
1818 return data
1819 return data
1819 elif compression_mode == COMP_MODE_DEFAULT:
1820 elif compression_mode == COMP_MODE_DEFAULT:
1820 return self._decompressor(data)
1821 return self._decompressor(data)
1821 elif compression_mode == COMP_MODE_INLINE:
1822 elif compression_mode == COMP_MODE_INLINE:
1822 return self.decompress(data)
1823 return self.decompress(data)
1823 else:
1824 else:
1824 msg = b'unknown compression mode %d'
1825 msg = b'unknown compression mode %d'
1825 msg %= compression_mode
1826 msg %= compression_mode
1826 raise error.RevlogError(msg)
1827 raise error.RevlogError(msg)
1827
1828
1828 def _chunks(self, revs, df=None, targetsize=None):
1829 def _chunks(self, revs, df=None, targetsize=None):
1829 """Obtain decompressed chunks for the specified revisions.
1830 """Obtain decompressed chunks for the specified revisions.
1830
1831
1831 Accepts an iterable of numeric revisions that are assumed to be in
1832 Accepts an iterable of numeric revisions that are assumed to be in
1832 ascending order. Also accepts an optional already-open file handle
1833 ascending order. Also accepts an optional already-open file handle
1833 to be used for reading. If used, the seek position of the file will
1834 to be used for reading. If used, the seek position of the file will
1834 not be preserved.
1835 not be preserved.
1835
1836
1836 This function is similar to calling ``self._chunk()`` multiple times,
1837 This function is similar to calling ``self._chunk()`` multiple times,
1837 but is faster.
1838 but is faster.
1838
1839
1839 Returns a list with decompressed data for each requested revision.
1840 Returns a list with decompressed data for each requested revision.
1840 """
1841 """
1841 if not revs:
1842 if not revs:
1842 return []
1843 return []
1843 start = self.start
1844 start = self.start
1844 length = self.length
1845 length = self.length
1845 inline = self._inline
1846 inline = self._inline
1846 iosize = self.index.entry_size
1847 iosize = self.index.entry_size
1847 buffer = util.buffer
1848 buffer = util.buffer
1848
1849
1849 l = []
1850 l = []
1850 ladd = l.append
1851 ladd = l.append
1851
1852
1852 if not self._withsparseread:
1853 if not self._withsparseread:
1853 slicedchunks = (revs,)
1854 slicedchunks = (revs,)
1854 else:
1855 else:
1855 slicedchunks = deltautil.slicechunk(
1856 slicedchunks = deltautil.slicechunk(
1856 self, revs, targetsize=targetsize
1857 self, revs, targetsize=targetsize
1857 )
1858 )
1858
1859
1859 for revschunk in slicedchunks:
1860 for revschunk in slicedchunks:
1860 firstrev = revschunk[0]
1861 firstrev = revschunk[0]
1861 # Skip trailing revisions with empty diff
1862 # Skip trailing revisions with empty diff
1862 for lastrev in revschunk[::-1]:
1863 for lastrev in revschunk[::-1]:
1863 if length(lastrev) != 0:
1864 if length(lastrev) != 0:
1864 break
1865 break
1865
1866
1866 try:
1867 try:
1867 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1868 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1868 except OverflowError:
1869 except OverflowError:
1869 # issue4215 - we can't cache a run of chunks greater than
1870 # issue4215 - we can't cache a run of chunks greater than
1870 # 2G on Windows
1871 # 2G on Windows
1871 return [self._chunk(rev, df=df) for rev in revschunk]
1872 return [self._chunk(rev, df=df) for rev in revschunk]
1872
1873
1873 decomp = self.decompress
1874 decomp = self.decompress
1874 # self._decompressor might be None, but will not be used in that case
1875 # self._decompressor might be None, but will not be used in that case
1875 def_decomp = self._decompressor
1876 def_decomp = self._decompressor
1876 for rev in revschunk:
1877 for rev in revschunk:
1877 chunkstart = start(rev)
1878 chunkstart = start(rev)
1878 if inline:
1879 if inline:
1879 chunkstart += (rev + 1) * iosize
1880 chunkstart += (rev + 1) * iosize
1880 chunklength = length(rev)
1881 chunklength = length(rev)
1881 comp_mode = self.index[rev][10]
1882 comp_mode = self.index[rev][10]
1882 c = buffer(data, chunkstart - offset, chunklength)
1883 c = buffer(data, chunkstart - offset, chunklength)
1883 if comp_mode == COMP_MODE_PLAIN:
1884 if comp_mode == COMP_MODE_PLAIN:
1884 ladd(c)
1885 ladd(c)
1885 elif comp_mode == COMP_MODE_INLINE:
1886 elif comp_mode == COMP_MODE_INLINE:
1886 ladd(decomp(c))
1887 ladd(decomp(c))
1887 elif comp_mode == COMP_MODE_DEFAULT:
1888 elif comp_mode == COMP_MODE_DEFAULT:
1888 ladd(def_decomp(c))
1889 ladd(def_decomp(c))
1889 else:
1890 else:
1890 msg = b'unknown compression mode %d'
1891 msg = b'unknown compression mode %d'
1891 msg %= comp_mode
1892 msg %= comp_mode
1892 raise error.RevlogError(msg)
1893 raise error.RevlogError(msg)
1893
1894
1894 return l
1895 return l
1895
1896
1896 def deltaparent(self, rev):
1897 def deltaparent(self, rev):
1897 """return deltaparent of the given revision"""
1898 """return deltaparent of the given revision"""
1898 base = self.index[rev][3]
1899 base = self.index[rev][3]
1899 if base == rev:
1900 if base == rev:
1900 return nullrev
1901 return nullrev
1901 elif self._generaldelta:
1902 elif self._generaldelta:
1902 return base
1903 return base
1903 else:
1904 else:
1904 return rev - 1
1905 return rev - 1
1905
1906
1906 def issnapshot(self, rev):
1907 def issnapshot(self, rev):
1907 """tells whether rev is a snapshot"""
1908 """tells whether rev is a snapshot"""
1908 if not self._sparserevlog:
1909 if not self._sparserevlog:
1909 return self.deltaparent(rev) == nullrev
1910 return self.deltaparent(rev) == nullrev
1910 elif hasattr(self.index, 'issnapshot'):
1911 elif hasattr(self.index, 'issnapshot'):
1911 # directly assign the method to cache the testing and access
1912 # directly assign the method to cache the testing and access
1912 self.issnapshot = self.index.issnapshot
1913 self.issnapshot = self.index.issnapshot
1913 return self.issnapshot(rev)
1914 return self.issnapshot(rev)
1914 if rev == nullrev:
1915 if rev == nullrev:
1915 return True
1916 return True
1916 entry = self.index[rev]
1917 entry = self.index[rev]
1917 base = entry[3]
1918 base = entry[3]
1918 if base == rev:
1919 if base == rev:
1919 return True
1920 return True
1920 if base == nullrev:
1921 if base == nullrev:
1921 return True
1922 return True
1922 p1 = entry[5]
1923 p1 = entry[5]
1923 while self.length(p1) == 0:
1924 while self.length(p1) == 0:
1924 b = self.deltaparent(p1)
1925 b = self.deltaparent(p1)
1925 if b == p1:
1926 if b == p1:
1926 break
1927 break
1927 p1 = b
1928 p1 = b
1928 p2 = entry[6]
1929 p2 = entry[6]
1929 while self.length(p2) == 0:
1930 while self.length(p2) == 0:
1930 b = self.deltaparent(p2)
1931 b = self.deltaparent(p2)
1931 if b == p2:
1932 if b == p2:
1932 break
1933 break
1933 p2 = b
1934 p2 = b
1934 if base == p1 or base == p2:
1935 if base == p1 or base == p2:
1935 return False
1936 return False
1936 return self.issnapshot(base)
1937 return self.issnapshot(base)
1937
1938
1938 def snapshotdepth(self, rev):
1939 def snapshotdepth(self, rev):
1939 """number of snapshot in the chain before this one"""
1940 """number of snapshot in the chain before this one"""
1940 if not self.issnapshot(rev):
1941 if not self.issnapshot(rev):
1941 raise error.ProgrammingError(b'revision %d not a snapshot')
1942 raise error.ProgrammingError(b'revision %d not a snapshot')
1942 return len(self._deltachain(rev)[0]) - 1
1943 return len(self._deltachain(rev)[0]) - 1
1943
1944
1944 def revdiff(self, rev1, rev2):
1945 def revdiff(self, rev1, rev2):
1945 """return or calculate a delta between two revisions
1946 """return or calculate a delta between two revisions
1946
1947
1947 The delta calculated is in binary form and is intended to be written to
1948 The delta calculated is in binary form and is intended to be written to
1948 revlog data directly. So this function needs raw revision data.
1949 revlog data directly. So this function needs raw revision data.
1949 """
1950 """
1950 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1951 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1951 return bytes(self._chunk(rev2))
1952 return bytes(self._chunk(rev2))
1952
1953
1953 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1954 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1954
1955
1955 def revision(self, nodeorrev, _df=None):
1956 def revision(self, nodeorrev, _df=None):
1956 """return an uncompressed revision of a given node or revision
1957 """return an uncompressed revision of a given node or revision
1957 number.
1958 number.
1958
1959
1959 _df - an existing file handle to read from. (internal-only)
1960 _df - an existing file handle to read from. (internal-only)
1960 """
1961 """
1961 return self._revisiondata(nodeorrev, _df)
1962 return self._revisiondata(nodeorrev, _df)
1962
1963
1963 def sidedata(self, nodeorrev, _df=None):
1964 def sidedata(self, nodeorrev, _df=None):
1964 """a map of extra data related to the changeset but not part of the hash
1965 """a map of extra data related to the changeset but not part of the hash
1965
1966
1966 This function currently return a dictionary. However, more advanced
1967 This function currently return a dictionary. However, more advanced
1967 mapping object will likely be used in the future for a more
1968 mapping object will likely be used in the future for a more
1968 efficient/lazy code.
1969 efficient/lazy code.
1969 """
1970 """
1970 # deal with <nodeorrev> argument type
1971 # deal with <nodeorrev> argument type
1971 if isinstance(nodeorrev, int):
1972 if isinstance(nodeorrev, int):
1972 rev = nodeorrev
1973 rev = nodeorrev
1973 else:
1974 else:
1974 rev = self.rev(nodeorrev)
1975 rev = self.rev(nodeorrev)
1975 return self._sidedata(rev)
1976 return self._sidedata(rev)
1976
1977
1977 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1978 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1978 # deal with <nodeorrev> argument type
1979 # deal with <nodeorrev> argument type
1979 if isinstance(nodeorrev, int):
1980 if isinstance(nodeorrev, int):
1980 rev = nodeorrev
1981 rev = nodeorrev
1981 node = self.node(rev)
1982 node = self.node(rev)
1982 else:
1983 else:
1983 node = nodeorrev
1984 node = nodeorrev
1984 rev = None
1985 rev = None
1985
1986
1986 # fast path the special `nullid` rev
1987 # fast path the special `nullid` rev
1987 if node == self.nullid:
1988 if node == self.nullid:
1988 return b""
1989 return b""
1989
1990
1990 # ``rawtext`` is the text as stored inside the revlog. Might be the
1991 # ``rawtext`` is the text as stored inside the revlog. Might be the
1991 # revision or might need to be processed to retrieve the revision.
1992 # revision or might need to be processed to retrieve the revision.
1992 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1993 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1993
1994
1994 if raw and validated:
1995 if raw and validated:
1995 # if we don't want to process the raw text and that raw
1996 # if we don't want to process the raw text and that raw
1996 # text is cached, we can exit early.
1997 # text is cached, we can exit early.
1997 return rawtext
1998 return rawtext
1998 if rev is None:
1999 if rev is None:
1999 rev = self.rev(node)
2000 rev = self.rev(node)
2000 # the revlog's flag for this revision
2001 # the revlog's flag for this revision
2001 # (usually alter its state or content)
2002 # (usually alter its state or content)
2002 flags = self.flags(rev)
2003 flags = self.flags(rev)
2003
2004
2004 if validated and flags == REVIDX_DEFAULT_FLAGS:
2005 if validated and flags == REVIDX_DEFAULT_FLAGS:
2005 # no extra flags set, no flag processor runs, text = rawtext
2006 # no extra flags set, no flag processor runs, text = rawtext
2006 return rawtext
2007 return rawtext
2007
2008
2008 if raw:
2009 if raw:
2009 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2010 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2010 text = rawtext
2011 text = rawtext
2011 else:
2012 else:
2012 r = flagutil.processflagsread(self, rawtext, flags)
2013 r = flagutil.processflagsread(self, rawtext, flags)
2013 text, validatehash = r
2014 text, validatehash = r
2014 if validatehash:
2015 if validatehash:
2015 self.checkhash(text, node, rev=rev)
2016 self.checkhash(text, node, rev=rev)
2016 if not validated:
2017 if not validated:
2017 self._revisioncache = (node, rev, rawtext)
2018 self._revisioncache = (node, rev, rawtext)
2018
2019
2019 return text
2020 return text
2020
2021
2021 def _rawtext(self, node, rev, _df=None):
2022 def _rawtext(self, node, rev, _df=None):
2022 """return the possibly unvalidated rawtext for a revision
2023 """return the possibly unvalidated rawtext for a revision
2023
2024
2024 returns (rev, rawtext, validated)
2025 returns (rev, rawtext, validated)
2025 """
2026 """
2026
2027
2027 # revision in the cache (could be useful to apply delta)
2028 # revision in the cache (could be useful to apply delta)
2028 cachedrev = None
2029 cachedrev = None
2029 # An intermediate text to apply deltas to
2030 # An intermediate text to apply deltas to
2030 basetext = None
2031 basetext = None
2031
2032
2032 # Check if we have the entry in cache
2033 # Check if we have the entry in cache
2033 # The cache entry looks like (node, rev, rawtext)
2034 # The cache entry looks like (node, rev, rawtext)
2034 if self._revisioncache:
2035 if self._revisioncache:
2035 if self._revisioncache[0] == node:
2036 if self._revisioncache[0] == node:
2036 return (rev, self._revisioncache[2], True)
2037 return (rev, self._revisioncache[2], True)
2037 cachedrev = self._revisioncache[1]
2038 cachedrev = self._revisioncache[1]
2038
2039
2039 if rev is None:
2040 if rev is None:
2040 rev = self.rev(node)
2041 rev = self.rev(node)
2041
2042
2042 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2043 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2043 if stopped:
2044 if stopped:
2044 basetext = self._revisioncache[2]
2045 basetext = self._revisioncache[2]
2045
2046
2046 # drop cache to save memory, the caller is expected to
2047 # drop cache to save memory, the caller is expected to
2047 # update self._revisioncache after validating the text
2048 # update self._revisioncache after validating the text
2048 self._revisioncache = None
2049 self._revisioncache = None
2049
2050
2050 targetsize = None
2051 targetsize = None
2051 rawsize = self.index[rev][2]
2052 rawsize = self.index[rev][2]
2052 if 0 <= rawsize:
2053 if 0 <= rawsize:
2053 targetsize = 4 * rawsize
2054 targetsize = 4 * rawsize
2054
2055
2055 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2056 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2056 if basetext is None:
2057 if basetext is None:
2057 basetext = bytes(bins[0])
2058 basetext = bytes(bins[0])
2058 bins = bins[1:]
2059 bins = bins[1:]
2059
2060
2060 rawtext = mdiff.patches(basetext, bins)
2061 rawtext = mdiff.patches(basetext, bins)
2061 del basetext # let us have a chance to free memory early
2062 del basetext # let us have a chance to free memory early
2062 return (rev, rawtext, False)
2063 return (rev, rawtext, False)
2063
2064
2064 def _sidedata(self, rev):
2065 def _sidedata(self, rev):
2065 """Return the sidedata for a given revision number."""
2066 """Return the sidedata for a given revision number."""
2066 index_entry = self.index[rev]
2067 index_entry = self.index[rev]
2067 sidedata_offset = index_entry[8]
2068 sidedata_offset = index_entry[8]
2068 sidedata_size = index_entry[9]
2069 sidedata_size = index_entry[9]
2069
2070
2070 if self._inline:
2071 if self._inline:
2071 sidedata_offset += self.index.entry_size * (1 + rev)
2072 sidedata_offset += self.index.entry_size * (1 + rev)
2072 if sidedata_size == 0:
2073 if sidedata_size == 0:
2073 return {}
2074 return {}
2074
2075
2075 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2076 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2076 filename = self._sidedatafile
2077 filename = self._sidedatafile
2077 end = self._docket.sidedata_end
2078 end = self._docket.sidedata_end
2078 offset = sidedata_offset
2079 offset = sidedata_offset
2079 length = sidedata_size
2080 length = sidedata_size
2080 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2081 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2081 raise error.RevlogError(m)
2082 raise error.RevlogError(m)
2082
2083
2083 comp_segment = self._segmentfile_sidedata.read_chunk(
2084 comp_segment = self._segmentfile_sidedata.read_chunk(
2084 sidedata_offset, sidedata_size
2085 sidedata_offset, sidedata_size
2085 )
2086 )
2086
2087
2087 comp = self.index[rev][11]
2088 comp = self.index[rev][11]
2088 if comp == COMP_MODE_PLAIN:
2089 if comp == COMP_MODE_PLAIN:
2089 segment = comp_segment
2090 segment = comp_segment
2090 elif comp == COMP_MODE_DEFAULT:
2091 elif comp == COMP_MODE_DEFAULT:
2091 segment = self._decompressor(comp_segment)
2092 segment = self._decompressor(comp_segment)
2092 elif comp == COMP_MODE_INLINE:
2093 elif comp == COMP_MODE_INLINE:
2093 segment = self.decompress(comp_segment)
2094 segment = self.decompress(comp_segment)
2094 else:
2095 else:
2095 msg = b'unknown compression mode %d'
2096 msg = b'unknown compression mode %d'
2096 msg %= comp
2097 msg %= comp
2097 raise error.RevlogError(msg)
2098 raise error.RevlogError(msg)
2098
2099
2099 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 sidedata = sidedatautil.deserialize_sidedata(segment)
2100 return sidedata
2101 return sidedata
2101
2102
2102 def rawdata(self, nodeorrev, _df=None):
2103 def rawdata(self, nodeorrev, _df=None):
2103 """return an uncompressed raw data of a given node or revision number.
2104 """return an uncompressed raw data of a given node or revision number.
2104
2105
2105 _df - an existing file handle to read from. (internal-only)
2106 _df - an existing file handle to read from. (internal-only)
2106 """
2107 """
2107 return self._revisiondata(nodeorrev, _df, raw=True)
2108 return self._revisiondata(nodeorrev, _df, raw=True)
2108
2109
2109 def hash(self, text, p1, p2):
2110 def hash(self, text, p1, p2):
2110 """Compute a node hash.
2111 """Compute a node hash.
2111
2112
2112 Available as a function so that subclasses can replace the hash
2113 Available as a function so that subclasses can replace the hash
2113 as needed.
2114 as needed.
2114 """
2115 """
2115 return storageutil.hashrevisionsha1(text, p1, p2)
2116 return storageutil.hashrevisionsha1(text, p1, p2)
2116
2117
2117 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2118 """Check node hash integrity.
2119 """Check node hash integrity.
2119
2120
2120 Available as a function so that subclasses can extend hash mismatch
2121 Available as a function so that subclasses can extend hash mismatch
2121 behaviors as needed.
2122 behaviors as needed.
2122 """
2123 """
2123 try:
2124 try:
2124 if p1 is None and p2 is None:
2125 if p1 is None and p2 is None:
2125 p1, p2 = self.parents(node)
2126 p1, p2 = self.parents(node)
2126 if node != self.hash(text, p1, p2):
2127 if node != self.hash(text, p1, p2):
2127 # Clear the revision cache on hash failure. The revision cache
2128 # Clear the revision cache on hash failure. The revision cache
2128 # only stores the raw revision and clearing the cache does have
2129 # only stores the raw revision and clearing the cache does have
2129 # the side-effect that we won't have a cache hit when the raw
2130 # the side-effect that we won't have a cache hit when the raw
2130 # revision data is accessed. But this case should be rare and
2131 # revision data is accessed. But this case should be rare and
2131 # it is extra work to teach the cache about the hash
2132 # it is extra work to teach the cache about the hash
2132 # verification state.
2133 # verification state.
2133 if self._revisioncache and self._revisioncache[0] == node:
2134 if self._revisioncache and self._revisioncache[0] == node:
2134 self._revisioncache = None
2135 self._revisioncache = None
2135
2136
2136 revornode = rev
2137 revornode = rev
2137 if revornode is None:
2138 if revornode is None:
2138 revornode = templatefilters.short(hex(node))
2139 revornode = templatefilters.short(hex(node))
2139 raise error.RevlogError(
2140 raise error.RevlogError(
2140 _(b"integrity check failed on %s:%s")
2141 _(b"integrity check failed on %s:%s")
2141 % (self.display_id, pycompat.bytestr(revornode))
2142 % (self.display_id, pycompat.bytestr(revornode))
2142 )
2143 )
2143 except error.RevlogError:
2144 except error.RevlogError:
2144 if self._censorable and storageutil.iscensoredtext(text):
2145 if self._censorable and storageutil.iscensoredtext(text):
2145 raise error.CensoredNodeError(self.display_id, node, text)
2146 raise error.CensoredNodeError(self.display_id, node, text)
2146 raise
2147 raise
2147
2148
2148 @property
2149 @property
2149 def _split_index_file(self):
2150 def _split_index_file(self):
2150 """the path where to expect the index of an ongoing splitting operation
2151 """the path where to expect the index of an ongoing splitting operation
2151
2152
2152 The file will only exist if a splitting operation is in progress, but
2153 The file will only exist if a splitting operation is in progress, but
2153 it is always expected at the same location."""
2154 it is always expected at the same location."""
2154 parts = self.radix.split(b'/')
2155 parts = self.radix.split(b'/')
2155 if len(parts) > 1:
2156 if len(parts) > 1:
2156 # adds a '-s' prefix to the ``data/` or `meta/` base
2157 # adds a '-s' prefix to the ``data/` or `meta/` base
2157 head = parts[0] + b'-s'
2158 head = parts[0] + b'-s'
2158 mids = parts[1:-1]
2159 mids = parts[1:-1]
2159 tail = parts[-1] + b'.i'
2160 tail = parts[-1] + b'.i'
2160 pieces = [head] + mids + [tail]
2161 pieces = [head] + mids + [tail]
2161 return b'/'.join(pieces)
2162 return b'/'.join(pieces)
2162 else:
2163 else:
2163 # the revlog is stored at the root of the store (changelog or
2164 # the revlog is stored at the root of the store (changelog or
2164 # manifest), no risk of collision.
2165 # manifest), no risk of collision.
2165 return self.radix + b'.i.s'
2166 return self.radix + b'.i.s'
2166
2167
2167 def _enforceinlinesize(self, tr, side_write=True):
2168 def _enforceinlinesize(self, tr, side_write=True):
2168 """Check if the revlog is too big for inline and convert if so.
2169 """Check if the revlog is too big for inline and convert if so.
2169
2170
2170 This should be called after revisions are added to the revlog. If the
2171 This should be called after revisions are added to the revlog. If the
2171 revlog has grown too large to be an inline revlog, it will convert it
2172 revlog has grown too large to be an inline revlog, it will convert it
2172 to use multiple index and data files.
2173 to use multiple index and data files.
2173 """
2174 """
2174 tiprev = len(self) - 1
2175 tiprev = len(self) - 1
2175 total_size = self.start(tiprev) + self.length(tiprev)
2176 total_size = self.start(tiprev) + self.length(tiprev)
2176 if not self._inline or total_size < _maxinline:
2177 if not self._inline or total_size < _maxinline:
2177 return
2178 return
2178
2179
2179 troffset = tr.findoffset(self._indexfile)
2180 troffset = tr.findoffset(self._indexfile)
2180 if troffset is None:
2181 if troffset is None:
2181 raise error.RevlogError(
2182 raise error.RevlogError(
2182 _(b"%s not found in the transaction") % self._indexfile
2183 _(b"%s not found in the transaction") % self._indexfile
2183 )
2184 )
2184 if troffset:
2185 if troffset:
2185 tr.addbackup(self._indexfile, for_offset=True)
2186 tr.addbackup(self._indexfile, for_offset=True)
2186 tr.add(self._datafile, 0)
2187 tr.add(self._datafile, 0)
2187
2188
2188 existing_handles = False
2189 existing_handles = False
2189 if self._writinghandles is not None:
2190 if self._writinghandles is not None:
2190 existing_handles = True
2191 existing_handles = True
2191 fp = self._writinghandles[0]
2192 fp = self._writinghandles[0]
2192 fp.flush()
2193 fp.flush()
2193 fp.close()
2194 fp.close()
2194 # We can't use the cached file handle after close(). So prevent
2195 # We can't use the cached file handle after close(). So prevent
2195 # its usage.
2196 # its usage.
2196 self._writinghandles = None
2197 self._writinghandles = None
2197 self._segmentfile.writing_handle = None
2198 self._segmentfile.writing_handle = None
2198 # No need to deal with sidedata writing handle as it is only
2199 # No need to deal with sidedata writing handle as it is only
2199 # relevant with revlog-v2 which is never inline, not reaching
2200 # relevant with revlog-v2 which is never inline, not reaching
2200 # this code
2201 # this code
2201 if side_write:
2202 if side_write:
2202 old_index_file_path = self._indexfile
2203 old_index_file_path = self._indexfile
2203 new_index_file_path = self._split_index_file
2204 new_index_file_path = self._split_index_file
2204 opener = self.opener
2205 opener = self.opener
2205 weak_self = weakref.ref(self)
2206 weak_self = weakref.ref(self)
2206
2207
2207 # the "split" index replace the real index when the transaction is finalized
2208 # the "split" index replace the real index when the transaction is finalized
2208 def finalize_callback(tr):
2209 def finalize_callback(tr):
2209 opener.rename(
2210 opener.rename(
2210 new_index_file_path,
2211 new_index_file_path,
2211 old_index_file_path,
2212 old_index_file_path,
2212 checkambig=True,
2213 checkambig=True,
2213 )
2214 )
2214 maybe_self = weak_self()
2215 maybe_self = weak_self()
2215 if maybe_self is not None:
2216 if maybe_self is not None:
2216 maybe_self._indexfile = old_index_file_path
2217 maybe_self._indexfile = old_index_file_path
2217
2218
2218 def abort_callback(tr):
2219 def abort_callback(tr):
2219 maybe_self = weak_self()
2220 maybe_self = weak_self()
2220 if maybe_self is not None:
2221 if maybe_self is not None:
2221 maybe_self._indexfile = old_index_file_path
2222 maybe_self._indexfile = old_index_file_path
2222
2223
2223 tr.registertmp(new_index_file_path)
2224 tr.registertmp(new_index_file_path)
2224 if self.target[1] is not None:
2225 if self.target[1] is not None:
2225 callback_id = b'000-revlog-split-%d-%s' % self.target
2226 callback_id = b'000-revlog-split-%d-%s' % self.target
2226 else:
2227 else:
2227 callback_id = b'000-revlog-split-%d' % self.target[0]
2228 callback_id = b'000-revlog-split-%d' % self.target[0]
2228 tr.addfinalize(callback_id, finalize_callback)
2229 tr.addfinalize(callback_id, finalize_callback)
2229 tr.addabort(callback_id, abort_callback)
2230 tr.addabort(callback_id, abort_callback)
2230
2231
2231 new_dfh = self._datafp(b'w+')
2232 new_dfh = self._datafp(b'w+')
2232 new_dfh.truncate(0) # drop any potentially existing data
2233 new_dfh.truncate(0) # drop any potentially existing data
2233 try:
2234 try:
2234 with self._indexfp() as read_ifh:
2235 with self._indexfp() as read_ifh:
2235 for r in self:
2236 for r in self:
2236 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2237 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2237 new_dfh.flush()
2238 new_dfh.flush()
2238
2239
2239 if side_write:
2240 if side_write:
2240 self._indexfile = new_index_file_path
2241 self._indexfile = new_index_file_path
2241 with self.__index_new_fp() as fp:
2242 with self.__index_new_fp() as fp:
2242 self._format_flags &= ~FLAG_INLINE_DATA
2243 self._format_flags &= ~FLAG_INLINE_DATA
2243 self._inline = False
2244 self._inline = False
2244 for i in self:
2245 for i in self:
2245 e = self.index.entry_binary(i)
2246 e = self.index.entry_binary(i)
2246 if i == 0 and self._docket is None:
2247 if i == 0 and self._docket is None:
2247 header = self._format_flags | self._format_version
2248 header = self._format_flags | self._format_version
2248 header = self.index.pack_header(header)
2249 header = self.index.pack_header(header)
2249 e = header + e
2250 e = header + e
2250 fp.write(e)
2251 fp.write(e)
2251 if self._docket is not None:
2252 if self._docket is not None:
2252 self._docket.index_end = fp.tell()
2253 self._docket.index_end = fp.tell()
2253
2254
2254 # If we don't use side-write, the temp file replace the real
2255 # If we don't use side-write, the temp file replace the real
2255 # index when we exit the context manager
2256 # index when we exit the context manager
2256
2257
2257 nodemaputil.setup_persistent_nodemap(tr, self)
2258 nodemaputil.setup_persistent_nodemap(tr, self)
2258 self._segmentfile = randomaccessfile.randomaccessfile(
2259 self._segmentfile = randomaccessfile.randomaccessfile(
2259 self.opener,
2260 self.opener,
2260 self._datafile,
2261 self._datafile,
2261 self._chunkcachesize,
2262 self._chunkcachesize,
2262 )
2263 )
2263
2264
2264 if existing_handles:
2265 if existing_handles:
2265 # switched from inline to conventional reopen the index
2266 # switched from inline to conventional reopen the index
2266 ifh = self.__index_write_fp()
2267 ifh = self.__index_write_fp()
2267 self._writinghandles = (ifh, new_dfh, None)
2268 self._writinghandles = (ifh, new_dfh, None)
2268 self._segmentfile.writing_handle = new_dfh
2269 self._segmentfile.writing_handle = new_dfh
2269 new_dfh = None
2270 new_dfh = None
2270 # No need to deal with sidedata writing handle as it is only
2271 # No need to deal with sidedata writing handle as it is only
2271 # relevant with revlog-v2 which is never inline, not reaching
2272 # relevant with revlog-v2 which is never inline, not reaching
2272 # this code
2273 # this code
2273 finally:
2274 finally:
2274 if new_dfh is not None:
2275 if new_dfh is not None:
2275 new_dfh.close()
2276 new_dfh.close()
2276
2277
2277 def _nodeduplicatecallback(self, transaction, node):
2278 def _nodeduplicatecallback(self, transaction, node):
2278 """called when trying to add a node already stored."""
2279 """called when trying to add a node already stored."""
2279
2280
2280 @contextlib.contextmanager
2281 @contextlib.contextmanager
2281 def reading(self):
2282 def reading(self):
2282 """Context manager that keeps data and sidedata files open for reading"""
2283 """Context manager that keeps data and sidedata files open for reading"""
2283 with self._segmentfile.reading():
2284 with self._segmentfile.reading():
2284 with self._segmentfile_sidedata.reading():
2285 with self._segmentfile_sidedata.reading():
2285 yield
2286 yield
2286
2287
2287 @contextlib.contextmanager
2288 @contextlib.contextmanager
2288 def _writing(self, transaction):
2289 def _writing(self, transaction):
2289 if self._trypending:
2290 if self._trypending:
2290 msg = b'try to write in a `trypending` revlog: %s'
2291 msg = b'try to write in a `trypending` revlog: %s'
2291 msg %= self.display_id
2292 msg %= self.display_id
2292 raise error.ProgrammingError(msg)
2293 raise error.ProgrammingError(msg)
2293 if self._writinghandles is not None:
2294 if self._writinghandles is not None:
2294 yield
2295 yield
2295 else:
2296 else:
2296 ifh = dfh = sdfh = None
2297 ifh = dfh = sdfh = None
2297 try:
2298 try:
2298 r = len(self)
2299 r = len(self)
2299 # opening the data file.
2300 # opening the data file.
2300 dsize = 0
2301 dsize = 0
2301 if r:
2302 if r:
2302 dsize = self.end(r - 1)
2303 dsize = self.end(r - 1)
2303 dfh = None
2304 dfh = None
2304 if not self._inline:
2305 if not self._inline:
2305 try:
2306 try:
2306 dfh = self._datafp(b"r+")
2307 dfh = self._datafp(b"r+")
2307 if self._docket is None:
2308 if self._docket is None:
2308 dfh.seek(0, os.SEEK_END)
2309 dfh.seek(0, os.SEEK_END)
2309 else:
2310 else:
2310 dfh.seek(self._docket.data_end, os.SEEK_SET)
2311 dfh.seek(self._docket.data_end, os.SEEK_SET)
2311 except FileNotFoundError:
2312 except FileNotFoundError:
2312 dfh = self._datafp(b"w+")
2313 dfh = self._datafp(b"w+")
2313 transaction.add(self._datafile, dsize)
2314 transaction.add(self._datafile, dsize)
2314 if self._sidedatafile is not None:
2315 if self._sidedatafile is not None:
2315 # revlog-v2 does not inline, help Pytype
2316 # revlog-v2 does not inline, help Pytype
2316 assert dfh is not None
2317 assert dfh is not None
2317 try:
2318 try:
2318 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2319 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2319 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2320 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2320 except FileNotFoundError:
2321 except FileNotFoundError:
2321 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2322 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2322 transaction.add(
2323 transaction.add(
2323 self._sidedatafile, self._docket.sidedata_end
2324 self._sidedatafile, self._docket.sidedata_end
2324 )
2325 )
2325
2326
2326 # opening the index file.
2327 # opening the index file.
2327 isize = r * self.index.entry_size
2328 isize = r * self.index.entry_size
2328 ifh = self.__index_write_fp()
2329 ifh = self.__index_write_fp()
2329 if self._inline:
2330 if self._inline:
2330 transaction.add(self._indexfile, dsize + isize)
2331 transaction.add(self._indexfile, dsize + isize)
2331 else:
2332 else:
2332 transaction.add(self._indexfile, isize)
2333 transaction.add(self._indexfile, isize)
2333 # exposing all file handle for writing.
2334 # exposing all file handle for writing.
2334 self._writinghandles = (ifh, dfh, sdfh)
2335 self._writinghandles = (ifh, dfh, sdfh)
2335 self._segmentfile.writing_handle = ifh if self._inline else dfh
2336 self._segmentfile.writing_handle = ifh if self._inline else dfh
2336 self._segmentfile_sidedata.writing_handle = sdfh
2337 self._segmentfile_sidedata.writing_handle = sdfh
2337 yield
2338 yield
2338 if self._docket is not None:
2339 if self._docket is not None:
2339 self._write_docket(transaction)
2340 self._write_docket(transaction)
2340 finally:
2341 finally:
2341 self._writinghandles = None
2342 self._writinghandles = None
2342 self._segmentfile.writing_handle = None
2343 self._segmentfile.writing_handle = None
2343 self._segmentfile_sidedata.writing_handle = None
2344 self._segmentfile_sidedata.writing_handle = None
2344 if dfh is not None:
2345 if dfh is not None:
2345 dfh.close()
2346 dfh.close()
2346 if sdfh is not None:
2347 if sdfh is not None:
2347 sdfh.close()
2348 sdfh.close()
2348 # closing the index file last to avoid exposing referent to
2349 # closing the index file last to avoid exposing referent to
2349 # potential unflushed data content.
2350 # potential unflushed data content.
2350 if ifh is not None:
2351 if ifh is not None:
2351 ifh.close()
2352 ifh.close()
2352
2353
2353 def _write_docket(self, transaction):
2354 def _write_docket(self, transaction):
2354 """write the current docket on disk
2355 """write the current docket on disk
2355
2356
2356 Exist as a method to help changelog to implement transaction logic
2357 Exist as a method to help changelog to implement transaction logic
2357
2358
2358 We could also imagine using the same transaction logic for all revlog
2359 We could also imagine using the same transaction logic for all revlog
2359 since docket are cheap."""
2360 since docket are cheap."""
2360 self._docket.write(transaction)
2361 self._docket.write(transaction)
2361
2362
2362 def addrevision(
2363 def addrevision(
2363 self,
2364 self,
2364 text,
2365 text,
2365 transaction,
2366 transaction,
2366 link,
2367 link,
2367 p1,
2368 p1,
2368 p2,
2369 p2,
2369 cachedelta=None,
2370 cachedelta=None,
2370 node=None,
2371 node=None,
2371 flags=REVIDX_DEFAULT_FLAGS,
2372 flags=REVIDX_DEFAULT_FLAGS,
2372 deltacomputer=None,
2373 deltacomputer=None,
2373 sidedata=None,
2374 sidedata=None,
2374 ):
2375 ):
2375 """add a revision to the log
2376 """add a revision to the log
2376
2377
2377 text - the revision data to add
2378 text - the revision data to add
2378 transaction - the transaction object used for rollback
2379 transaction - the transaction object used for rollback
2379 link - the linkrev data to add
2380 link - the linkrev data to add
2380 p1, p2 - the parent nodeids of the revision
2381 p1, p2 - the parent nodeids of the revision
2381 cachedelta - an optional precomputed delta
2382 cachedelta - an optional precomputed delta
2382 node - nodeid of revision; typically node is not specified, and it is
2383 node - nodeid of revision; typically node is not specified, and it is
2383 computed by default as hash(text, p1, p2), however subclasses might
2384 computed by default as hash(text, p1, p2), however subclasses might
2384 use different hashing method (and override checkhash() in such case)
2385 use different hashing method (and override checkhash() in such case)
2385 flags - the known flags to set on the revision
2386 flags - the known flags to set on the revision
2386 deltacomputer - an optional deltacomputer instance shared between
2387 deltacomputer - an optional deltacomputer instance shared between
2387 multiple calls
2388 multiple calls
2388 """
2389 """
2389 if link == nullrev:
2390 if link == nullrev:
2390 raise error.RevlogError(
2391 raise error.RevlogError(
2391 _(b"attempted to add linkrev -1 to %s") % self.display_id
2392 _(b"attempted to add linkrev -1 to %s") % self.display_id
2392 )
2393 )
2393
2394
2394 if sidedata is None:
2395 if sidedata is None:
2395 sidedata = {}
2396 sidedata = {}
2396 elif sidedata and not self.hassidedata:
2397 elif sidedata and not self.hassidedata:
2397 raise error.ProgrammingError(
2398 raise error.ProgrammingError(
2398 _(b"trying to add sidedata to a revlog who don't support them")
2399 _(b"trying to add sidedata to a revlog who don't support them")
2399 )
2400 )
2400
2401
2401 if flags:
2402 if flags:
2402 node = node or self.hash(text, p1, p2)
2403 node = node or self.hash(text, p1, p2)
2403
2404
2404 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2405 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2405
2406
2406 # If the flag processor modifies the revision data, ignore any provided
2407 # If the flag processor modifies the revision data, ignore any provided
2407 # cachedelta.
2408 # cachedelta.
2408 if rawtext != text:
2409 if rawtext != text:
2409 cachedelta = None
2410 cachedelta = None
2410
2411
2411 if len(rawtext) > _maxentrysize:
2412 if len(rawtext) > _maxentrysize:
2412 raise error.RevlogError(
2413 raise error.RevlogError(
2413 _(
2414 _(
2414 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2415 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2415 )
2416 )
2416 % (self.display_id, len(rawtext))
2417 % (self.display_id, len(rawtext))
2417 )
2418 )
2418
2419
2419 node = node or self.hash(rawtext, p1, p2)
2420 node = node or self.hash(rawtext, p1, p2)
2420 rev = self.index.get_rev(node)
2421 rev = self.index.get_rev(node)
2421 if rev is not None:
2422 if rev is not None:
2422 return rev
2423 return rev
2423
2424
2424 if validatehash:
2425 if validatehash:
2425 self.checkhash(rawtext, node, p1=p1, p2=p2)
2426 self.checkhash(rawtext, node, p1=p1, p2=p2)
2426
2427
2427 return self.addrawrevision(
2428 return self.addrawrevision(
2428 rawtext,
2429 rawtext,
2429 transaction,
2430 transaction,
2430 link,
2431 link,
2431 p1,
2432 p1,
2432 p2,
2433 p2,
2433 node,
2434 node,
2434 flags,
2435 flags,
2435 cachedelta=cachedelta,
2436 cachedelta=cachedelta,
2436 deltacomputer=deltacomputer,
2437 deltacomputer=deltacomputer,
2437 sidedata=sidedata,
2438 sidedata=sidedata,
2438 )
2439 )
2439
2440
2440 def addrawrevision(
2441 def addrawrevision(
2441 self,
2442 self,
2442 rawtext,
2443 rawtext,
2443 transaction,
2444 transaction,
2444 link,
2445 link,
2445 p1,
2446 p1,
2446 p2,
2447 p2,
2447 node,
2448 node,
2448 flags,
2449 flags,
2449 cachedelta=None,
2450 cachedelta=None,
2450 deltacomputer=None,
2451 deltacomputer=None,
2451 sidedata=None,
2452 sidedata=None,
2452 ):
2453 ):
2453 """add a raw revision with known flags, node and parents
2454 """add a raw revision with known flags, node and parents
2454 useful when reusing a revision not stored in this revlog (ex: received
2455 useful when reusing a revision not stored in this revlog (ex: received
2455 over wire, or read from an external bundle).
2456 over wire, or read from an external bundle).
2456 """
2457 """
2457 with self._writing(transaction):
2458 with self._writing(transaction):
2458 return self._addrevision(
2459 return self._addrevision(
2459 node,
2460 node,
2460 rawtext,
2461 rawtext,
2461 transaction,
2462 transaction,
2462 link,
2463 link,
2463 p1,
2464 p1,
2464 p2,
2465 p2,
2465 flags,
2466 flags,
2466 cachedelta,
2467 cachedelta,
2467 deltacomputer=deltacomputer,
2468 deltacomputer=deltacomputer,
2468 sidedata=sidedata,
2469 sidedata=sidedata,
2469 )
2470 )
2470
2471
2471 def compress(self, data):
2472 def compress(self, data):
2472 """Generate a possibly-compressed representation of data."""
2473 """Generate a possibly-compressed representation of data."""
2473 if not data:
2474 if not data:
2474 return b'', data
2475 return b'', data
2475
2476
2476 compressed = self._compressor.compress(data)
2477 compressed = self._compressor.compress(data)
2477
2478
2478 if compressed:
2479 if compressed:
2479 # The revlog compressor added the header in the returned data.
2480 # The revlog compressor added the header in the returned data.
2480 return b'', compressed
2481 return b'', compressed
2481
2482
2482 if data[0:1] == b'\0':
2483 if data[0:1] == b'\0':
2483 return b'', data
2484 return b'', data
2484 return b'u', data
2485 return b'u', data
2485
2486
2486 def decompress(self, data):
2487 def decompress(self, data):
2487 """Decompress a revlog chunk.
2488 """Decompress a revlog chunk.
2488
2489
2489 The chunk is expected to begin with a header identifying the
2490 The chunk is expected to begin with a header identifying the
2490 format type so it can be routed to an appropriate decompressor.
2491 format type so it can be routed to an appropriate decompressor.
2491 """
2492 """
2492 if not data:
2493 if not data:
2493 return data
2494 return data
2494
2495
2495 # Revlogs are read much more frequently than they are written and many
2496 # Revlogs are read much more frequently than they are written and many
2496 # chunks only take microseconds to decompress, so performance is
2497 # chunks only take microseconds to decompress, so performance is
2497 # important here.
2498 # important here.
2498 #
2499 #
2499 # We can make a few assumptions about revlogs:
2500 # We can make a few assumptions about revlogs:
2500 #
2501 #
2501 # 1) the majority of chunks will be compressed (as opposed to inline
2502 # 1) the majority of chunks will be compressed (as opposed to inline
2502 # raw data).
2503 # raw data).
2503 # 2) decompressing *any* data will likely by at least 10x slower than
2504 # 2) decompressing *any* data will likely by at least 10x slower than
2504 # returning raw inline data.
2505 # returning raw inline data.
2505 # 3) we want to prioritize common and officially supported compression
2506 # 3) we want to prioritize common and officially supported compression
2506 # engines
2507 # engines
2507 #
2508 #
2508 # It follows that we want to optimize for "decompress compressed data
2509 # It follows that we want to optimize for "decompress compressed data
2509 # when encoded with common and officially supported compression engines"
2510 # when encoded with common and officially supported compression engines"
2510 # case over "raw data" and "data encoded by less common or non-official
2511 # case over "raw data" and "data encoded by less common or non-official
2511 # compression engines." That is why we have the inline lookup first
2512 # compression engines." That is why we have the inline lookup first
2512 # followed by the compengines lookup.
2513 # followed by the compengines lookup.
2513 #
2514 #
2514 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2515 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2515 # compressed chunks. And this matters for changelog and manifest reads.
2516 # compressed chunks. And this matters for changelog and manifest reads.
2516 t = data[0:1]
2517 t = data[0:1]
2517
2518
2518 if t == b'x':
2519 if t == b'x':
2519 try:
2520 try:
2520 return _zlibdecompress(data)
2521 return _zlibdecompress(data)
2521 except zlib.error as e:
2522 except zlib.error as e:
2522 raise error.RevlogError(
2523 raise error.RevlogError(
2523 _(b'revlog decompress error: %s')
2524 _(b'revlog decompress error: %s')
2524 % stringutil.forcebytestr(e)
2525 % stringutil.forcebytestr(e)
2525 )
2526 )
2526 # '\0' is more common than 'u' so it goes first.
2527 # '\0' is more common than 'u' so it goes first.
2527 elif t == b'\0':
2528 elif t == b'\0':
2528 return data
2529 return data
2529 elif t == b'u':
2530 elif t == b'u':
2530 return util.buffer(data, 1)
2531 return util.buffer(data, 1)
2531
2532
2532 compressor = self._get_decompressor(t)
2533 compressor = self._get_decompressor(t)
2533
2534
2534 return compressor.decompress(data)
2535 return compressor.decompress(data)
2535
2536
2536 def _addrevision(
2537 def _addrevision(
2537 self,
2538 self,
2538 node,
2539 node,
2539 rawtext,
2540 rawtext,
2540 transaction,
2541 transaction,
2541 link,
2542 link,
2542 p1,
2543 p1,
2543 p2,
2544 p2,
2544 flags,
2545 flags,
2545 cachedelta,
2546 cachedelta,
2546 alwayscache=False,
2547 alwayscache=False,
2547 deltacomputer=None,
2548 deltacomputer=None,
2548 sidedata=None,
2549 sidedata=None,
2549 ):
2550 ):
2550 """internal function to add revisions to the log
2551 """internal function to add revisions to the log
2551
2552
2552 see addrevision for argument descriptions.
2553 see addrevision for argument descriptions.
2553
2554
2554 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2555 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2555
2556
2556 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2557 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2557 be used.
2558 be used.
2558
2559
2559 invariants:
2560 invariants:
2560 - rawtext is optional (can be None); if not set, cachedelta must be set.
2561 - rawtext is optional (can be None); if not set, cachedelta must be set.
2561 if both are set, they must correspond to each other.
2562 if both are set, they must correspond to each other.
2562 """
2563 """
2563 if node == self.nullid:
2564 if node == self.nullid:
2564 raise error.RevlogError(
2565 raise error.RevlogError(
2565 _(b"%s: attempt to add null revision") % self.display_id
2566 _(b"%s: attempt to add null revision") % self.display_id
2566 )
2567 )
2567 if (
2568 if (
2568 node == self.nodeconstants.wdirid
2569 node == self.nodeconstants.wdirid
2569 or node in self.nodeconstants.wdirfilenodeids
2570 or node in self.nodeconstants.wdirfilenodeids
2570 ):
2571 ):
2571 raise error.RevlogError(
2572 raise error.RevlogError(
2572 _(b"%s: attempt to add wdir revision") % self.display_id
2573 _(b"%s: attempt to add wdir revision") % self.display_id
2573 )
2574 )
2574 if self._writinghandles is None:
2575 if self._writinghandles is None:
2575 msg = b'adding revision outside `revlog._writing` context'
2576 msg = b'adding revision outside `revlog._writing` context'
2576 raise error.ProgrammingError(msg)
2577 raise error.ProgrammingError(msg)
2577
2578
2578 if self._inline:
2579 if self._inline:
2579 fh = self._writinghandles[0]
2580 fh = self._writinghandles[0]
2580 else:
2581 else:
2581 fh = self._writinghandles[1]
2582 fh = self._writinghandles[1]
2582
2583
2583 btext = [rawtext]
2584 btext = [rawtext]
2584
2585
2585 curr = len(self)
2586 curr = len(self)
2586 prev = curr - 1
2587 prev = curr - 1
2587
2588
2588 offset = self._get_data_offset(prev)
2589 offset = self._get_data_offset(prev)
2589
2590
2590 if self._concurrencychecker:
2591 if self._concurrencychecker:
2591 ifh, dfh, sdfh = self._writinghandles
2592 ifh, dfh, sdfh = self._writinghandles
2592 # XXX no checking for the sidedata file
2593 # XXX no checking for the sidedata file
2593 if self._inline:
2594 if self._inline:
2594 # offset is "as if" it were in the .d file, so we need to add on
2595 # offset is "as if" it were in the .d file, so we need to add on
2595 # the size of the entry metadata.
2596 # the size of the entry metadata.
2596 self._concurrencychecker(
2597 self._concurrencychecker(
2597 ifh, self._indexfile, offset + curr * self.index.entry_size
2598 ifh, self._indexfile, offset + curr * self.index.entry_size
2598 )
2599 )
2599 else:
2600 else:
2600 # Entries in the .i are a consistent size.
2601 # Entries in the .i are a consistent size.
2601 self._concurrencychecker(
2602 self._concurrencychecker(
2602 ifh, self._indexfile, curr * self.index.entry_size
2603 ifh, self._indexfile, curr * self.index.entry_size
2603 )
2604 )
2604 self._concurrencychecker(dfh, self._datafile, offset)
2605 self._concurrencychecker(dfh, self._datafile, offset)
2605
2606
2606 p1r, p2r = self.rev(p1), self.rev(p2)
2607 p1r, p2r = self.rev(p1), self.rev(p2)
2607
2608
2608 # full versions are inserted when the needed deltas
2609 # full versions are inserted when the needed deltas
2609 # become comparable to the uncompressed text
2610 # become comparable to the uncompressed text
2610 if rawtext is None:
2611 if rawtext is None:
2611 # need rawtext size, before changed by flag processors, which is
2612 # need rawtext size, before changed by flag processors, which is
2612 # the non-raw size. use revlog explicitly to avoid filelog's extra
2613 # the non-raw size. use revlog explicitly to avoid filelog's extra
2613 # logic that might remove metadata size.
2614 # logic that might remove metadata size.
2614 textlen = mdiff.patchedsize(
2615 textlen = mdiff.patchedsize(
2615 revlog.size(self, cachedelta[0]), cachedelta[1]
2616 revlog.size(self, cachedelta[0]), cachedelta[1]
2616 )
2617 )
2617 else:
2618 else:
2618 textlen = len(rawtext)
2619 textlen = len(rawtext)
2619
2620
2620 if deltacomputer is None:
2621 if deltacomputer is None:
2621 write_debug = None
2622 write_debug = None
2622 if self._debug_delta:
2623 if self._debug_delta:
2623 write_debug = transaction._report
2624 write_debug = transaction._report
2624 deltacomputer = deltautil.deltacomputer(
2625 deltacomputer = deltautil.deltacomputer(
2625 self, write_debug=write_debug
2626 self, write_debug=write_debug
2626 )
2627 )
2627
2628
2628 if cachedelta is not None and len(cachedelta) == 2:
2629 if cachedelta is not None and len(cachedelta) == 2:
2629 # If the cached delta has no information about how it should be
2630 # If the cached delta has no information about how it should be
2630 # reused, add the default reuse instruction according to the
2631 # reused, add the default reuse instruction according to the
2631 # revlog's configuration.
2632 # revlog's configuration.
2632 if self._generaldelta and self._lazydeltabase:
2633 if self._generaldelta and self._lazydeltabase:
2633 delta_base_reuse = DELTA_BASE_REUSE_TRY
2634 delta_base_reuse = DELTA_BASE_REUSE_TRY
2634 else:
2635 else:
2635 delta_base_reuse = DELTA_BASE_REUSE_NO
2636 delta_base_reuse = DELTA_BASE_REUSE_NO
2636 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2637 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2637
2638
2638 revinfo = revlogutils.revisioninfo(
2639 revinfo = revlogutils.revisioninfo(
2639 node,
2640 node,
2640 p1,
2641 p1,
2641 p2,
2642 p2,
2642 btext,
2643 btext,
2643 textlen,
2644 textlen,
2644 cachedelta,
2645 cachedelta,
2645 flags,
2646 flags,
2646 )
2647 )
2647
2648
2648 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2649 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2649
2650
2650 compression_mode = COMP_MODE_INLINE
2651 compression_mode = COMP_MODE_INLINE
2651 if self._docket is not None:
2652 if self._docket is not None:
2652 default_comp = self._docket.default_compression_header
2653 default_comp = self._docket.default_compression_header
2653 r = deltautil.delta_compression(default_comp, deltainfo)
2654 r = deltautil.delta_compression(default_comp, deltainfo)
2654 compression_mode, deltainfo = r
2655 compression_mode, deltainfo = r
2655
2656
2656 sidedata_compression_mode = COMP_MODE_INLINE
2657 sidedata_compression_mode = COMP_MODE_INLINE
2657 if sidedata and self.hassidedata:
2658 if sidedata and self.hassidedata:
2658 sidedata_compression_mode = COMP_MODE_PLAIN
2659 sidedata_compression_mode = COMP_MODE_PLAIN
2659 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2660 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2660 sidedata_offset = self._docket.sidedata_end
2661 sidedata_offset = self._docket.sidedata_end
2661 h, comp_sidedata = self.compress(serialized_sidedata)
2662 h, comp_sidedata = self.compress(serialized_sidedata)
2662 if (
2663 if (
2663 h != b'u'
2664 h != b'u'
2664 and comp_sidedata[0:1] != b'\0'
2665 and comp_sidedata[0:1] != b'\0'
2665 and len(comp_sidedata) < len(serialized_sidedata)
2666 and len(comp_sidedata) < len(serialized_sidedata)
2666 ):
2667 ):
2667 assert not h
2668 assert not h
2668 if (
2669 if (
2669 comp_sidedata[0:1]
2670 comp_sidedata[0:1]
2670 == self._docket.default_compression_header
2671 == self._docket.default_compression_header
2671 ):
2672 ):
2672 sidedata_compression_mode = COMP_MODE_DEFAULT
2673 sidedata_compression_mode = COMP_MODE_DEFAULT
2673 serialized_sidedata = comp_sidedata
2674 serialized_sidedata = comp_sidedata
2674 else:
2675 else:
2675 sidedata_compression_mode = COMP_MODE_INLINE
2676 sidedata_compression_mode = COMP_MODE_INLINE
2676 serialized_sidedata = comp_sidedata
2677 serialized_sidedata = comp_sidedata
2677 else:
2678 else:
2678 serialized_sidedata = b""
2679 serialized_sidedata = b""
2679 # Don't store the offset if the sidedata is empty, that way
2680 # Don't store the offset if the sidedata is empty, that way
2680 # we can easily detect empty sidedata and they will be no different
2681 # we can easily detect empty sidedata and they will be no different
2681 # than ones we manually add.
2682 # than ones we manually add.
2682 sidedata_offset = 0
2683 sidedata_offset = 0
2683
2684
2684 rank = RANK_UNKNOWN
2685 rank = RANK_UNKNOWN
2685 if self._compute_rank:
2686 if self._compute_rank:
2686 if (p1r, p2r) == (nullrev, nullrev):
2687 if (p1r, p2r) == (nullrev, nullrev):
2687 rank = 1
2688 rank = 1
2688 elif p1r != nullrev and p2r == nullrev:
2689 elif p1r != nullrev and p2r == nullrev:
2689 rank = 1 + self.fast_rank(p1r)
2690 rank = 1 + self.fast_rank(p1r)
2690 elif p1r == nullrev and p2r != nullrev:
2691 elif p1r == nullrev and p2r != nullrev:
2691 rank = 1 + self.fast_rank(p2r)
2692 rank = 1 + self.fast_rank(p2r)
2692 else: # merge node
2693 else: # merge node
2693 if rustdagop is not None and self.index.rust_ext_compat:
2694 if rustdagop is not None and self.index.rust_ext_compat:
2694 rank = rustdagop.rank(self.index, p1r, p2r)
2695 rank = rustdagop.rank(self.index, p1r, p2r)
2695 else:
2696 else:
2696 pmin, pmax = sorted((p1r, p2r))
2697 pmin, pmax = sorted((p1r, p2r))
2697 rank = 1 + self.fast_rank(pmax)
2698 rank = 1 + self.fast_rank(pmax)
2698 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2699 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2699
2700
2700 e = revlogutils.entry(
2701 e = revlogutils.entry(
2701 flags=flags,
2702 flags=flags,
2702 data_offset=offset,
2703 data_offset=offset,
2703 data_compressed_length=deltainfo.deltalen,
2704 data_compressed_length=deltainfo.deltalen,
2704 data_uncompressed_length=textlen,
2705 data_uncompressed_length=textlen,
2705 data_compression_mode=compression_mode,
2706 data_compression_mode=compression_mode,
2706 data_delta_base=deltainfo.base,
2707 data_delta_base=deltainfo.base,
2707 link_rev=link,
2708 link_rev=link,
2708 parent_rev_1=p1r,
2709 parent_rev_1=p1r,
2709 parent_rev_2=p2r,
2710 parent_rev_2=p2r,
2710 node_id=node,
2711 node_id=node,
2711 sidedata_offset=sidedata_offset,
2712 sidedata_offset=sidedata_offset,
2712 sidedata_compressed_length=len(serialized_sidedata),
2713 sidedata_compressed_length=len(serialized_sidedata),
2713 sidedata_compression_mode=sidedata_compression_mode,
2714 sidedata_compression_mode=sidedata_compression_mode,
2714 rank=rank,
2715 rank=rank,
2715 )
2716 )
2716
2717
2717 self.index.append(e)
2718 self.index.append(e)
2718 entry = self.index.entry_binary(curr)
2719 entry = self.index.entry_binary(curr)
2719 if curr == 0 and self._docket is None:
2720 if curr == 0 and self._docket is None:
2720 header = self._format_flags | self._format_version
2721 header = self._format_flags | self._format_version
2721 header = self.index.pack_header(header)
2722 header = self.index.pack_header(header)
2722 entry = header + entry
2723 entry = header + entry
2723 self._writeentry(
2724 self._writeentry(
2724 transaction,
2725 transaction,
2725 entry,
2726 entry,
2726 deltainfo.data,
2727 deltainfo.data,
2727 link,
2728 link,
2728 offset,
2729 offset,
2729 serialized_sidedata,
2730 serialized_sidedata,
2730 sidedata_offset,
2731 sidedata_offset,
2731 )
2732 )
2732
2733
2733 rawtext = btext[0]
2734 rawtext = btext[0]
2734
2735
2735 if alwayscache and rawtext is None:
2736 if alwayscache and rawtext is None:
2736 rawtext = deltacomputer.buildtext(revinfo, fh)
2737 rawtext = deltacomputer.buildtext(revinfo, fh)
2737
2738
2738 if type(rawtext) == bytes: # only accept immutable objects
2739 if type(rawtext) == bytes: # only accept immutable objects
2739 self._revisioncache = (node, curr, rawtext)
2740 self._revisioncache = (node, curr, rawtext)
2740 self._chainbasecache[curr] = deltainfo.chainbase
2741 self._chainbasecache[curr] = deltainfo.chainbase
2741 return curr
2742 return curr
2742
2743
2743 def _get_data_offset(self, prev):
2744 def _get_data_offset(self, prev):
2744 """Returns the current offset in the (in-transaction) data file.
2745 """Returns the current offset in the (in-transaction) data file.
2745 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2746 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2746 file to store that information: since sidedata can be rewritten to the
2747 file to store that information: since sidedata can be rewritten to the
2747 end of the data file within a transaction, you can have cases where, for
2748 end of the data file within a transaction, you can have cases where, for
2748 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2749 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2749 to `n - 1`'s sidedata being written after `n`'s data.
2750 to `n - 1`'s sidedata being written after `n`'s data.
2750
2751
2751 TODO cache this in a docket file before getting out of experimental."""
2752 TODO cache this in a docket file before getting out of experimental."""
2752 if self._docket is None:
2753 if self._docket is None:
2753 return self.end(prev)
2754 return self.end(prev)
2754 else:
2755 else:
2755 return self._docket.data_end
2756 return self._docket.data_end
2756
2757
2757 def _writeentry(
2758 def _writeentry(
2758 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2759 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2759 ):
2760 ):
2760 # Files opened in a+ mode have inconsistent behavior on various
2761 # Files opened in a+ mode have inconsistent behavior on various
2761 # platforms. Windows requires that a file positioning call be made
2762 # platforms. Windows requires that a file positioning call be made
2762 # when the file handle transitions between reads and writes. See
2763 # when the file handle transitions between reads and writes. See
2763 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2764 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2764 # platforms, Python or the platform itself can be buggy. Some versions
2765 # platforms, Python or the platform itself can be buggy. Some versions
2765 # of Solaris have been observed to not append at the end of the file
2766 # of Solaris have been observed to not append at the end of the file
2766 # if the file was seeked to before the end. See issue4943 for more.
2767 # if the file was seeked to before the end. See issue4943 for more.
2767 #
2768 #
2768 # We work around this issue by inserting a seek() before writing.
2769 # We work around this issue by inserting a seek() before writing.
2769 # Note: This is likely not necessary on Python 3. However, because
2770 # Note: This is likely not necessary on Python 3. However, because
2770 # the file handle is reused for reads and may be seeked there, we need
2771 # the file handle is reused for reads and may be seeked there, we need
2771 # to be careful before changing this.
2772 # to be careful before changing this.
2772 if self._writinghandles is None:
2773 if self._writinghandles is None:
2773 msg = b'adding revision outside `revlog._writing` context'
2774 msg = b'adding revision outside `revlog._writing` context'
2774 raise error.ProgrammingError(msg)
2775 raise error.ProgrammingError(msg)
2775 ifh, dfh, sdfh = self._writinghandles
2776 ifh, dfh, sdfh = self._writinghandles
2776 if self._docket is None:
2777 if self._docket is None:
2777 ifh.seek(0, os.SEEK_END)
2778 ifh.seek(0, os.SEEK_END)
2778 else:
2779 else:
2779 ifh.seek(self._docket.index_end, os.SEEK_SET)
2780 ifh.seek(self._docket.index_end, os.SEEK_SET)
2780 if dfh:
2781 if dfh:
2781 if self._docket is None:
2782 if self._docket is None:
2782 dfh.seek(0, os.SEEK_END)
2783 dfh.seek(0, os.SEEK_END)
2783 else:
2784 else:
2784 dfh.seek(self._docket.data_end, os.SEEK_SET)
2785 dfh.seek(self._docket.data_end, os.SEEK_SET)
2785 if sdfh:
2786 if sdfh:
2786 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2787 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2787
2788
2788 curr = len(self) - 1
2789 curr = len(self) - 1
2789 if not self._inline:
2790 if not self._inline:
2790 transaction.add(self._datafile, offset)
2791 transaction.add(self._datafile, offset)
2791 if self._sidedatafile:
2792 if self._sidedatafile:
2792 transaction.add(self._sidedatafile, sidedata_offset)
2793 transaction.add(self._sidedatafile, sidedata_offset)
2793 transaction.add(self._indexfile, curr * len(entry))
2794 transaction.add(self._indexfile, curr * len(entry))
2794 if data[0]:
2795 if data[0]:
2795 dfh.write(data[0])
2796 dfh.write(data[0])
2796 dfh.write(data[1])
2797 dfh.write(data[1])
2797 if sidedata:
2798 if sidedata:
2798 sdfh.write(sidedata)
2799 sdfh.write(sidedata)
2799 ifh.write(entry)
2800 ifh.write(entry)
2800 else:
2801 else:
2801 offset += curr * self.index.entry_size
2802 offset += curr * self.index.entry_size
2802 transaction.add(self._indexfile, offset)
2803 transaction.add(self._indexfile, offset)
2803 ifh.write(entry)
2804 ifh.write(entry)
2804 ifh.write(data[0])
2805 ifh.write(data[0])
2805 ifh.write(data[1])
2806 ifh.write(data[1])
2806 assert not sidedata
2807 assert not sidedata
2807 self._enforceinlinesize(transaction)
2808 self._enforceinlinesize(transaction)
2808 if self._docket is not None:
2809 if self._docket is not None:
2809 # revlog-v2 always has 3 writing handles, help Pytype
2810 # revlog-v2 always has 3 writing handles, help Pytype
2810 wh1 = self._writinghandles[0]
2811 wh1 = self._writinghandles[0]
2811 wh2 = self._writinghandles[1]
2812 wh2 = self._writinghandles[1]
2812 wh3 = self._writinghandles[2]
2813 wh3 = self._writinghandles[2]
2813 assert wh1 is not None
2814 assert wh1 is not None
2814 assert wh2 is not None
2815 assert wh2 is not None
2815 assert wh3 is not None
2816 assert wh3 is not None
2816 self._docket.index_end = wh1.tell()
2817 self._docket.index_end = wh1.tell()
2817 self._docket.data_end = wh2.tell()
2818 self._docket.data_end = wh2.tell()
2818 self._docket.sidedata_end = wh3.tell()
2819 self._docket.sidedata_end = wh3.tell()
2819
2820
2820 nodemaputil.setup_persistent_nodemap(transaction, self)
2821 nodemaputil.setup_persistent_nodemap(transaction, self)
2821
2822
2822 def addgroup(
2823 def addgroup(
2823 self,
2824 self,
2824 deltas,
2825 deltas,
2825 linkmapper,
2826 linkmapper,
2826 transaction,
2827 transaction,
2827 alwayscache=False,
2828 alwayscache=False,
2828 addrevisioncb=None,
2829 addrevisioncb=None,
2829 duplicaterevisioncb=None,
2830 duplicaterevisioncb=None,
2830 debug_info=None,
2831 debug_info=None,
2831 delta_base_reuse_policy=None,
2832 delta_base_reuse_policy=None,
2832 ):
2833 ):
2833 """
2834 """
2834 add a delta group
2835 add a delta group
2835
2836
2836 given a set of deltas, add them to the revision log. the
2837 given a set of deltas, add them to the revision log. the
2837 first delta is against its parent, which should be in our
2838 first delta is against its parent, which should be in our
2838 log, the rest are against the previous delta.
2839 log, the rest are against the previous delta.
2839
2840
2840 If ``addrevisioncb`` is defined, it will be called with arguments of
2841 If ``addrevisioncb`` is defined, it will be called with arguments of
2841 this revlog and the node that was added.
2842 this revlog and the node that was added.
2842 """
2843 """
2843
2844
2844 if self._adding_group:
2845 if self._adding_group:
2845 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2846 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2846
2847
2847 # read the default delta-base reuse policy from revlog config if the
2848 # read the default delta-base reuse policy from revlog config if the
2848 # group did not specify one.
2849 # group did not specify one.
2849 if delta_base_reuse_policy is None:
2850 if delta_base_reuse_policy is None:
2850 if self._generaldelta and self._lazydeltabase:
2851 if self._generaldelta and self._lazydeltabase:
2851 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2852 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
2852 else:
2853 else:
2853 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2854 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
2854
2855
2855 self._adding_group = True
2856 self._adding_group = True
2856 empty = True
2857 empty = True
2857 try:
2858 try:
2858 with self._writing(transaction):
2859 with self._writing(transaction):
2859 write_debug = None
2860 write_debug = None
2860 if self._debug_delta:
2861 if self._debug_delta:
2861 write_debug = transaction._report
2862 write_debug = transaction._report
2862 deltacomputer = deltautil.deltacomputer(
2863 deltacomputer = deltautil.deltacomputer(
2863 self,
2864 self,
2864 write_debug=write_debug,
2865 write_debug=write_debug,
2865 debug_info=debug_info,
2866 debug_info=debug_info,
2866 )
2867 )
2867 # loop through our set of deltas
2868 # loop through our set of deltas
2868 for data in deltas:
2869 for data in deltas:
2869 (
2870 (
2870 node,
2871 node,
2871 p1,
2872 p1,
2872 p2,
2873 p2,
2873 linknode,
2874 linknode,
2874 deltabase,
2875 deltabase,
2875 delta,
2876 delta,
2876 flags,
2877 flags,
2877 sidedata,
2878 sidedata,
2878 ) = data
2879 ) = data
2879 link = linkmapper(linknode)
2880 link = linkmapper(linknode)
2880 flags = flags or REVIDX_DEFAULT_FLAGS
2881 flags = flags or REVIDX_DEFAULT_FLAGS
2881
2882
2882 rev = self.index.get_rev(node)
2883 rev = self.index.get_rev(node)
2883 if rev is not None:
2884 if rev is not None:
2884 # this can happen if two branches make the same change
2885 # this can happen if two branches make the same change
2885 self._nodeduplicatecallback(transaction, rev)
2886 self._nodeduplicatecallback(transaction, rev)
2886 if duplicaterevisioncb:
2887 if duplicaterevisioncb:
2887 duplicaterevisioncb(self, rev)
2888 duplicaterevisioncb(self, rev)
2888 empty = False
2889 empty = False
2889 continue
2890 continue
2890
2891
2891 for p in (p1, p2):
2892 for p in (p1, p2):
2892 if not self.index.has_node(p):
2893 if not self.index.has_node(p):
2893 raise error.LookupError(
2894 raise error.LookupError(
2894 p, self.radix, _(b'unknown parent')
2895 p, self.radix, _(b'unknown parent')
2895 )
2896 )
2896
2897
2897 if not self.index.has_node(deltabase):
2898 if not self.index.has_node(deltabase):
2898 raise error.LookupError(
2899 raise error.LookupError(
2899 deltabase, self.display_id, _(b'unknown delta base')
2900 deltabase, self.display_id, _(b'unknown delta base')
2900 )
2901 )
2901
2902
2902 baserev = self.rev(deltabase)
2903 baserev = self.rev(deltabase)
2903
2904
2904 if baserev != nullrev and self.iscensored(baserev):
2905 if baserev != nullrev and self.iscensored(baserev):
2905 # if base is censored, delta must be full replacement in a
2906 # if base is censored, delta must be full replacement in a
2906 # single patch operation
2907 # single patch operation
2907 hlen = struct.calcsize(b">lll")
2908 hlen = struct.calcsize(b">lll")
2908 oldlen = self.rawsize(baserev)
2909 oldlen = self.rawsize(baserev)
2909 newlen = len(delta) - hlen
2910 newlen = len(delta) - hlen
2910 if delta[:hlen] != mdiff.replacediffheader(
2911 if delta[:hlen] != mdiff.replacediffheader(
2911 oldlen, newlen
2912 oldlen, newlen
2912 ):
2913 ):
2913 raise error.CensoredBaseError(
2914 raise error.CensoredBaseError(
2914 self.display_id, self.node(baserev)
2915 self.display_id, self.node(baserev)
2915 )
2916 )
2916
2917
2917 if not flags and self._peek_iscensored(baserev, delta):
2918 if not flags and self._peek_iscensored(baserev, delta):
2918 flags |= REVIDX_ISCENSORED
2919 flags |= REVIDX_ISCENSORED
2919
2920
2920 # We assume consumers of addrevisioncb will want to retrieve
2921 # We assume consumers of addrevisioncb will want to retrieve
2921 # the added revision, which will require a call to
2922 # the added revision, which will require a call to
2922 # revision(). revision() will fast path if there is a cache
2923 # revision(). revision() will fast path if there is a cache
2923 # hit. So, we tell _addrevision() to always cache in this case.
2924 # hit. So, we tell _addrevision() to always cache in this case.
2924 # We're only using addgroup() in the context of changegroup
2925 # We're only using addgroup() in the context of changegroup
2925 # generation so the revision data can always be handled as raw
2926 # generation so the revision data can always be handled as raw
2926 # by the flagprocessor.
2927 # by the flagprocessor.
2927 rev = self._addrevision(
2928 rev = self._addrevision(
2928 node,
2929 node,
2929 None,
2930 None,
2930 transaction,
2931 transaction,
2931 link,
2932 link,
2932 p1,
2933 p1,
2933 p2,
2934 p2,
2934 flags,
2935 flags,
2935 (baserev, delta, delta_base_reuse_policy),
2936 (baserev, delta, delta_base_reuse_policy),
2936 alwayscache=alwayscache,
2937 alwayscache=alwayscache,
2937 deltacomputer=deltacomputer,
2938 deltacomputer=deltacomputer,
2938 sidedata=sidedata,
2939 sidedata=sidedata,
2939 )
2940 )
2940
2941
2941 if addrevisioncb:
2942 if addrevisioncb:
2942 addrevisioncb(self, rev)
2943 addrevisioncb(self, rev)
2943 empty = False
2944 empty = False
2944 finally:
2945 finally:
2945 self._adding_group = False
2946 self._adding_group = False
2946 return not empty
2947 return not empty
2947
2948
2948 def iscensored(self, rev):
2949 def iscensored(self, rev):
2949 """Check if a file revision is censored."""
2950 """Check if a file revision is censored."""
2950 if not self._censorable:
2951 if not self._censorable:
2951 return False
2952 return False
2952
2953
2953 return self.flags(rev) & REVIDX_ISCENSORED
2954 return self.flags(rev) & REVIDX_ISCENSORED
2954
2955
2955 def _peek_iscensored(self, baserev, delta):
2956 def _peek_iscensored(self, baserev, delta):
2956 """Quickly check if a delta produces a censored revision."""
2957 """Quickly check if a delta produces a censored revision."""
2957 if not self._censorable:
2958 if not self._censorable:
2958 return False
2959 return False
2959
2960
2960 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2961 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2961
2962
2962 def getstrippoint(self, minlink):
2963 def getstrippoint(self, minlink):
2963 """find the minimum rev that must be stripped to strip the linkrev
2964 """find the minimum rev that must be stripped to strip the linkrev
2964
2965
2965 Returns a tuple containing the minimum rev and a set of all revs that
2966 Returns a tuple containing the minimum rev and a set of all revs that
2966 have linkrevs that will be broken by this strip.
2967 have linkrevs that will be broken by this strip.
2967 """
2968 """
2968 return storageutil.resolvestripinfo(
2969 return storageutil.resolvestripinfo(
2969 minlink,
2970 minlink,
2970 len(self) - 1,
2971 len(self) - 1,
2971 self.headrevs(),
2972 self.headrevs(),
2972 self.linkrev,
2973 self.linkrev,
2973 self.parentrevs,
2974 self.parentrevs,
2974 )
2975 )
2975
2976
2976 def strip(self, minlink, transaction):
2977 def strip(self, minlink, transaction):
2977 """truncate the revlog on the first revision with a linkrev >= minlink
2978 """truncate the revlog on the first revision with a linkrev >= minlink
2978
2979
2979 This function is called when we're stripping revision minlink and
2980 This function is called when we're stripping revision minlink and
2980 its descendants from the repository.
2981 its descendants from the repository.
2981
2982
2982 We have to remove all revisions with linkrev >= minlink, because
2983 We have to remove all revisions with linkrev >= minlink, because
2983 the equivalent changelog revisions will be renumbered after the
2984 the equivalent changelog revisions will be renumbered after the
2984 strip.
2985 strip.
2985
2986
2986 So we truncate the revlog on the first of these revisions, and
2987 So we truncate the revlog on the first of these revisions, and
2987 trust that the caller has saved the revisions that shouldn't be
2988 trust that the caller has saved the revisions that shouldn't be
2988 removed and that it'll re-add them after this truncation.
2989 removed and that it'll re-add them after this truncation.
2989 """
2990 """
2990 if len(self) == 0:
2991 if len(self) == 0:
2991 return
2992 return
2992
2993
2993 rev, _ = self.getstrippoint(minlink)
2994 rev, _ = self.getstrippoint(minlink)
2994 if rev == len(self):
2995 if rev == len(self):
2995 return
2996 return
2996
2997
2997 # first truncate the files on disk
2998 # first truncate the files on disk
2998 data_end = self.start(rev)
2999 data_end = self.start(rev)
2999 if not self._inline:
3000 if not self._inline:
3000 transaction.add(self._datafile, data_end)
3001 transaction.add(self._datafile, data_end)
3001 end = rev * self.index.entry_size
3002 end = rev * self.index.entry_size
3002 else:
3003 else:
3003 end = data_end + (rev * self.index.entry_size)
3004 end = data_end + (rev * self.index.entry_size)
3004
3005
3005 if self._sidedatafile:
3006 if self._sidedatafile:
3006 sidedata_end = self.sidedata_cut_off(rev)
3007 sidedata_end = self.sidedata_cut_off(rev)
3007 transaction.add(self._sidedatafile, sidedata_end)
3008 transaction.add(self._sidedatafile, sidedata_end)
3008
3009
3009 transaction.add(self._indexfile, end)
3010 transaction.add(self._indexfile, end)
3010 if self._docket is not None:
3011 if self._docket is not None:
3011 # XXX we could, leverage the docket while stripping. However it is
3012 # XXX we could, leverage the docket while stripping. However it is
3012 # not powerfull enough at the time of this comment
3013 # not powerfull enough at the time of this comment
3013 self._docket.index_end = end
3014 self._docket.index_end = end
3014 self._docket.data_end = data_end
3015 self._docket.data_end = data_end
3015 self._docket.sidedata_end = sidedata_end
3016 self._docket.sidedata_end = sidedata_end
3016 self._docket.write(transaction, stripping=True)
3017 self._docket.write(transaction, stripping=True)
3017
3018
3018 # then reset internal state in memory to forget those revisions
3019 # then reset internal state in memory to forget those revisions
3019 self._revisioncache = None
3020 self._revisioncache = None
3020 self._chaininfocache = util.lrucachedict(500)
3021 self._chaininfocache = util.lrucachedict(500)
3021 self._segmentfile.clear_cache()
3022 self._segmentfile.clear_cache()
3022 self._segmentfile_sidedata.clear_cache()
3023 self._segmentfile_sidedata.clear_cache()
3023
3024
3024 del self.index[rev:-1]
3025 del self.index[rev:-1]
3025
3026
3026 def checksize(self):
3027 def checksize(self):
3027 """Check size of index and data files
3028 """Check size of index and data files
3028
3029
3029 return a (dd, di) tuple.
3030 return a (dd, di) tuple.
3030 - dd: extra bytes for the "data" file
3031 - dd: extra bytes for the "data" file
3031 - di: extra bytes for the "index" file
3032 - di: extra bytes for the "index" file
3032
3033
3033 A healthy revlog will return (0, 0).
3034 A healthy revlog will return (0, 0).
3034 """
3035 """
3035 expected = 0
3036 expected = 0
3036 if len(self):
3037 if len(self):
3037 expected = max(0, self.end(len(self) - 1))
3038 expected = max(0, self.end(len(self) - 1))
3038
3039
3039 try:
3040 try:
3040 with self._datafp() as f:
3041 with self._datafp() as f:
3041 f.seek(0, io.SEEK_END)
3042 f.seek(0, io.SEEK_END)
3042 actual = f.tell()
3043 actual = f.tell()
3043 dd = actual - expected
3044 dd = actual - expected
3044 except FileNotFoundError:
3045 except FileNotFoundError:
3045 dd = 0
3046 dd = 0
3046
3047
3047 try:
3048 try:
3048 f = self.opener(self._indexfile)
3049 f = self.opener(self._indexfile)
3049 f.seek(0, io.SEEK_END)
3050 f.seek(0, io.SEEK_END)
3050 actual = f.tell()
3051 actual = f.tell()
3051 f.close()
3052 f.close()
3052 s = self.index.entry_size
3053 s = self.index.entry_size
3053 i = max(0, actual // s)
3054 i = max(0, actual // s)
3054 di = actual - (i * s)
3055 di = actual - (i * s)
3055 if self._inline:
3056 if self._inline:
3056 databytes = 0
3057 databytes = 0
3057 for r in self:
3058 for r in self:
3058 databytes += max(0, self.length(r))
3059 databytes += max(0, self.length(r))
3059 dd = 0
3060 dd = 0
3060 di = actual - len(self) * s - databytes
3061 di = actual - len(self) * s - databytes
3061 except FileNotFoundError:
3062 except FileNotFoundError:
3062 di = 0
3063 di = 0
3063
3064
3064 return (dd, di)
3065 return (dd, di)
3065
3066
3066 def files(self):
3067 def files(self):
3067 res = [self._indexfile]
3068 res = [self._indexfile]
3068 if self._docket_file is None:
3069 if self._docket_file is None:
3069 if not self._inline:
3070 if not self._inline:
3070 res.append(self._datafile)
3071 res.append(self._datafile)
3071 else:
3072 else:
3072 res.append(self._docket_file)
3073 res.append(self._docket_file)
3073 res.extend(self._docket.old_index_filepaths(include_empty=False))
3074 res.extend(self._docket.old_index_filepaths(include_empty=False))
3074 if self._docket.data_end:
3075 if self._docket.data_end:
3075 res.append(self._datafile)
3076 res.append(self._datafile)
3076 res.extend(self._docket.old_data_filepaths(include_empty=False))
3077 res.extend(self._docket.old_data_filepaths(include_empty=False))
3077 if self._docket.sidedata_end:
3078 if self._docket.sidedata_end:
3078 res.append(self._sidedatafile)
3079 res.append(self._sidedatafile)
3079 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3080 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3080 return res
3081 return res
3081
3082
3082 def emitrevisions(
3083 def emitrevisions(
3083 self,
3084 self,
3084 nodes,
3085 nodes,
3085 nodesorder=None,
3086 nodesorder=None,
3086 revisiondata=False,
3087 revisiondata=False,
3087 assumehaveparentrevisions=False,
3088 assumehaveparentrevisions=False,
3088 deltamode=repository.CG_DELTAMODE_STD,
3089 deltamode=repository.CG_DELTAMODE_STD,
3089 sidedata_helpers=None,
3090 sidedata_helpers=None,
3090 debug_info=None,
3091 debug_info=None,
3091 ):
3092 ):
3092 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3093 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3093 raise error.ProgrammingError(
3094 raise error.ProgrammingError(
3094 b'unhandled value for nodesorder: %s' % nodesorder
3095 b'unhandled value for nodesorder: %s' % nodesorder
3095 )
3096 )
3096
3097
3097 if nodesorder is None and not self._generaldelta:
3098 if nodesorder is None and not self._generaldelta:
3098 nodesorder = b'storage'
3099 nodesorder = b'storage'
3099
3100
3100 if (
3101 if (
3101 not self._storedeltachains
3102 not self._storedeltachains
3102 and deltamode != repository.CG_DELTAMODE_PREV
3103 and deltamode != repository.CG_DELTAMODE_PREV
3103 ):
3104 ):
3104 deltamode = repository.CG_DELTAMODE_FULL
3105 deltamode = repository.CG_DELTAMODE_FULL
3105
3106
3106 return storageutil.emitrevisions(
3107 return storageutil.emitrevisions(
3107 self,
3108 self,
3108 nodes,
3109 nodes,
3109 nodesorder,
3110 nodesorder,
3110 revlogrevisiondelta,
3111 revlogrevisiondelta,
3111 deltaparentfn=self.deltaparent,
3112 deltaparentfn=self.deltaparent,
3112 candeltafn=self._candelta,
3113 candeltafn=self._candelta,
3113 rawsizefn=self.rawsize,
3114 rawsizefn=self.rawsize,
3114 revdifffn=self.revdiff,
3115 revdifffn=self.revdiff,
3115 flagsfn=self.flags,
3116 flagsfn=self.flags,
3116 deltamode=deltamode,
3117 deltamode=deltamode,
3117 revisiondata=revisiondata,
3118 revisiondata=revisiondata,
3118 assumehaveparentrevisions=assumehaveparentrevisions,
3119 assumehaveparentrevisions=assumehaveparentrevisions,
3119 sidedata_helpers=sidedata_helpers,
3120 sidedata_helpers=sidedata_helpers,
3120 debug_info=debug_info,
3121 debug_info=debug_info,
3121 )
3122 )
3122
3123
3123 DELTAREUSEALWAYS = b'always'
3124 DELTAREUSEALWAYS = b'always'
3124 DELTAREUSESAMEREVS = b'samerevs'
3125 DELTAREUSESAMEREVS = b'samerevs'
3125 DELTAREUSENEVER = b'never'
3126 DELTAREUSENEVER = b'never'
3126
3127
3127 DELTAREUSEFULLADD = b'fulladd'
3128 DELTAREUSEFULLADD = b'fulladd'
3128
3129
3129 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3130 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3130
3131
3131 def clone(
3132 def clone(
3132 self,
3133 self,
3133 tr,
3134 tr,
3134 destrevlog,
3135 destrevlog,
3135 addrevisioncb=None,
3136 addrevisioncb=None,
3136 deltareuse=DELTAREUSESAMEREVS,
3137 deltareuse=DELTAREUSESAMEREVS,
3137 forcedeltabothparents=None,
3138 forcedeltabothparents=None,
3138 sidedata_helpers=None,
3139 sidedata_helpers=None,
3139 ):
3140 ):
3140 """Copy this revlog to another, possibly with format changes.
3141 """Copy this revlog to another, possibly with format changes.
3141
3142
3142 The destination revlog will contain the same revisions and nodes.
3143 The destination revlog will contain the same revisions and nodes.
3143 However, it may not be bit-for-bit identical due to e.g. delta encoding
3144 However, it may not be bit-for-bit identical due to e.g. delta encoding
3144 differences.
3145 differences.
3145
3146
3146 The ``deltareuse`` argument control how deltas from the existing revlog
3147 The ``deltareuse`` argument control how deltas from the existing revlog
3147 are preserved in the destination revlog. The argument can have the
3148 are preserved in the destination revlog. The argument can have the
3148 following values:
3149 following values:
3149
3150
3150 DELTAREUSEALWAYS
3151 DELTAREUSEALWAYS
3151 Deltas will always be reused (if possible), even if the destination
3152 Deltas will always be reused (if possible), even if the destination
3152 revlog would not select the same revisions for the delta. This is the
3153 revlog would not select the same revisions for the delta. This is the
3153 fastest mode of operation.
3154 fastest mode of operation.
3154 DELTAREUSESAMEREVS
3155 DELTAREUSESAMEREVS
3155 Deltas will be reused if the destination revlog would pick the same
3156 Deltas will be reused if the destination revlog would pick the same
3156 revisions for the delta. This mode strikes a balance between speed
3157 revisions for the delta. This mode strikes a balance between speed
3157 and optimization.
3158 and optimization.
3158 DELTAREUSENEVER
3159 DELTAREUSENEVER
3159 Deltas will never be reused. This is the slowest mode of execution.
3160 Deltas will never be reused. This is the slowest mode of execution.
3160 This mode can be used to recompute deltas (e.g. if the diff/delta
3161 This mode can be used to recompute deltas (e.g. if the diff/delta
3161 algorithm changes).
3162 algorithm changes).
3162 DELTAREUSEFULLADD
3163 DELTAREUSEFULLADD
3163 Revision will be re-added as if their were new content. This is
3164 Revision will be re-added as if their were new content. This is
3164 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3165 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3165 eg: large file detection and handling.
3166 eg: large file detection and handling.
3166
3167
3167 Delta computation can be slow, so the choice of delta reuse policy can
3168 Delta computation can be slow, so the choice of delta reuse policy can
3168 significantly affect run time.
3169 significantly affect run time.
3169
3170
3170 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3171 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3171 two extremes. Deltas will be reused if they are appropriate. But if the
3172 two extremes. Deltas will be reused if they are appropriate. But if the
3172 delta could choose a better revision, it will do so. This means if you
3173 delta could choose a better revision, it will do so. This means if you
3173 are converting a non-generaldelta revlog to a generaldelta revlog,
3174 are converting a non-generaldelta revlog to a generaldelta revlog,
3174 deltas will be recomputed if the delta's parent isn't a parent of the
3175 deltas will be recomputed if the delta's parent isn't a parent of the
3175 revision.
3176 revision.
3176
3177
3177 In addition to the delta policy, the ``forcedeltabothparents``
3178 In addition to the delta policy, the ``forcedeltabothparents``
3178 argument controls whether to force compute deltas against both parents
3179 argument controls whether to force compute deltas against both parents
3179 for merges. By default, the current default is used.
3180 for merges. By default, the current default is used.
3180
3181
3181 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3182 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3182 `sidedata_helpers`.
3183 `sidedata_helpers`.
3183 """
3184 """
3184 if deltareuse not in self.DELTAREUSEALL:
3185 if deltareuse not in self.DELTAREUSEALL:
3185 raise ValueError(
3186 raise ValueError(
3186 _(b'value for deltareuse invalid: %s') % deltareuse
3187 _(b'value for deltareuse invalid: %s') % deltareuse
3187 )
3188 )
3188
3189
3189 if len(destrevlog):
3190 if len(destrevlog):
3190 raise ValueError(_(b'destination revlog is not empty'))
3191 raise ValueError(_(b'destination revlog is not empty'))
3191
3192
3192 if getattr(self, 'filteredrevs', None):
3193 if getattr(self, 'filteredrevs', None):
3193 raise ValueError(_(b'source revlog has filtered revisions'))
3194 raise ValueError(_(b'source revlog has filtered revisions'))
3194 if getattr(destrevlog, 'filteredrevs', None):
3195 if getattr(destrevlog, 'filteredrevs', None):
3195 raise ValueError(_(b'destination revlog has filtered revisions'))
3196 raise ValueError(_(b'destination revlog has filtered revisions'))
3196
3197
3197 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3198 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3198 # if possible.
3199 # if possible.
3199 oldlazydelta = destrevlog._lazydelta
3200 oldlazydelta = destrevlog._lazydelta
3200 oldlazydeltabase = destrevlog._lazydeltabase
3201 oldlazydeltabase = destrevlog._lazydeltabase
3201 oldamd = destrevlog._deltabothparents
3202 oldamd = destrevlog._deltabothparents
3202
3203
3203 try:
3204 try:
3204 if deltareuse == self.DELTAREUSEALWAYS:
3205 if deltareuse == self.DELTAREUSEALWAYS:
3205 destrevlog._lazydeltabase = True
3206 destrevlog._lazydeltabase = True
3206 destrevlog._lazydelta = True
3207 destrevlog._lazydelta = True
3207 elif deltareuse == self.DELTAREUSESAMEREVS:
3208 elif deltareuse == self.DELTAREUSESAMEREVS:
3208 destrevlog._lazydeltabase = False
3209 destrevlog._lazydeltabase = False
3209 destrevlog._lazydelta = True
3210 destrevlog._lazydelta = True
3210 elif deltareuse == self.DELTAREUSENEVER:
3211 elif deltareuse == self.DELTAREUSENEVER:
3211 destrevlog._lazydeltabase = False
3212 destrevlog._lazydeltabase = False
3212 destrevlog._lazydelta = False
3213 destrevlog._lazydelta = False
3213
3214
3214 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3215 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3215
3216
3216 self._clone(
3217 self._clone(
3217 tr,
3218 tr,
3218 destrevlog,
3219 destrevlog,
3219 addrevisioncb,
3220 addrevisioncb,
3220 deltareuse,
3221 deltareuse,
3221 forcedeltabothparents,
3222 forcedeltabothparents,
3222 sidedata_helpers,
3223 sidedata_helpers,
3223 )
3224 )
3224
3225
3225 finally:
3226 finally:
3226 destrevlog._lazydelta = oldlazydelta
3227 destrevlog._lazydelta = oldlazydelta
3227 destrevlog._lazydeltabase = oldlazydeltabase
3228 destrevlog._lazydeltabase = oldlazydeltabase
3228 destrevlog._deltabothparents = oldamd
3229 destrevlog._deltabothparents = oldamd
3229
3230
3230 def _clone(
3231 def _clone(
3231 self,
3232 self,
3232 tr,
3233 tr,
3233 destrevlog,
3234 destrevlog,
3234 addrevisioncb,
3235 addrevisioncb,
3235 deltareuse,
3236 deltareuse,
3236 forcedeltabothparents,
3237 forcedeltabothparents,
3237 sidedata_helpers,
3238 sidedata_helpers,
3238 ):
3239 ):
3239 """perform the core duty of `revlog.clone` after parameter processing"""
3240 """perform the core duty of `revlog.clone` after parameter processing"""
3240 write_debug = None
3241 write_debug = None
3241 if self._debug_delta:
3242 if self._debug_delta:
3242 write_debug = tr._report
3243 write_debug = tr._report
3243 deltacomputer = deltautil.deltacomputer(
3244 deltacomputer = deltautil.deltacomputer(
3244 destrevlog,
3245 destrevlog,
3245 write_debug=write_debug,
3246 write_debug=write_debug,
3246 )
3247 )
3247 index = self.index
3248 index = self.index
3248 for rev in self:
3249 for rev in self:
3249 entry = index[rev]
3250 entry = index[rev]
3250
3251
3251 # Some classes override linkrev to take filtered revs into
3252 # Some classes override linkrev to take filtered revs into
3252 # account. Use raw entry from index.
3253 # account. Use raw entry from index.
3253 flags = entry[0] & 0xFFFF
3254 flags = entry[0] & 0xFFFF
3254 linkrev = entry[4]
3255 linkrev = entry[4]
3255 p1 = index[entry[5]][7]
3256 p1 = index[entry[5]][7]
3256 p2 = index[entry[6]][7]
3257 p2 = index[entry[6]][7]
3257 node = entry[7]
3258 node = entry[7]
3258
3259
3259 # (Possibly) reuse the delta from the revlog if allowed and
3260 # (Possibly) reuse the delta from the revlog if allowed and
3260 # the revlog chunk is a delta.
3261 # the revlog chunk is a delta.
3261 cachedelta = None
3262 cachedelta = None
3262 rawtext = None
3263 rawtext = None
3263 if deltareuse == self.DELTAREUSEFULLADD:
3264 if deltareuse == self.DELTAREUSEFULLADD:
3264 text = self._revisiondata(rev)
3265 text = self._revisiondata(rev)
3265 sidedata = self.sidedata(rev)
3266 sidedata = self.sidedata(rev)
3266
3267
3267 if sidedata_helpers is not None:
3268 if sidedata_helpers is not None:
3268 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3269 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3269 self, sidedata_helpers, sidedata, rev
3270 self, sidedata_helpers, sidedata, rev
3270 )
3271 )
3271 flags = flags | new_flags[0] & ~new_flags[1]
3272 flags = flags | new_flags[0] & ~new_flags[1]
3272
3273
3273 destrevlog.addrevision(
3274 destrevlog.addrevision(
3274 text,
3275 text,
3275 tr,
3276 tr,
3276 linkrev,
3277 linkrev,
3277 p1,
3278 p1,
3278 p2,
3279 p2,
3279 cachedelta=cachedelta,
3280 cachedelta=cachedelta,
3280 node=node,
3281 node=node,
3281 flags=flags,
3282 flags=flags,
3282 deltacomputer=deltacomputer,
3283 deltacomputer=deltacomputer,
3283 sidedata=sidedata,
3284 sidedata=sidedata,
3284 )
3285 )
3285 else:
3286 else:
3286 if destrevlog._lazydelta:
3287 if destrevlog._lazydelta:
3287 dp = self.deltaparent(rev)
3288 dp = self.deltaparent(rev)
3288 if dp != nullrev:
3289 if dp != nullrev:
3289 cachedelta = (dp, bytes(self._chunk(rev)))
3290 cachedelta = (dp, bytes(self._chunk(rev)))
3290
3291
3291 sidedata = None
3292 sidedata = None
3292 if not cachedelta:
3293 if not cachedelta:
3293 rawtext = self._revisiondata(rev)
3294 rawtext = self._revisiondata(rev)
3294 sidedata = self.sidedata(rev)
3295 sidedata = self.sidedata(rev)
3295 if sidedata is None:
3296 if sidedata is None:
3296 sidedata = self.sidedata(rev)
3297 sidedata = self.sidedata(rev)
3297
3298
3298 if sidedata_helpers is not None:
3299 if sidedata_helpers is not None:
3299 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3300 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3300 self, sidedata_helpers, sidedata, rev
3301 self, sidedata_helpers, sidedata, rev
3301 )
3302 )
3302 flags = flags | new_flags[0] & ~new_flags[1]
3303 flags = flags | new_flags[0] & ~new_flags[1]
3303
3304
3304 with destrevlog._writing(tr):
3305 with destrevlog._writing(tr):
3305 destrevlog._addrevision(
3306 destrevlog._addrevision(
3306 node,
3307 node,
3307 rawtext,
3308 rawtext,
3308 tr,
3309 tr,
3309 linkrev,
3310 linkrev,
3310 p1,
3311 p1,
3311 p2,
3312 p2,
3312 flags,
3313 flags,
3313 cachedelta,
3314 cachedelta,
3314 deltacomputer=deltacomputer,
3315 deltacomputer=deltacomputer,
3315 sidedata=sidedata,
3316 sidedata=sidedata,
3316 )
3317 )
3317
3318
3318 if addrevisioncb:
3319 if addrevisioncb:
3319 addrevisioncb(self, rev, node)
3320 addrevisioncb(self, rev, node)
3320
3321
3321 def censorrevision(self, tr, censornode, tombstone=b''):
3322 def censorrevision(self, tr, censornode, tombstone=b''):
3322 if self._format_version == REVLOGV0:
3323 if self._format_version == REVLOGV0:
3323 raise error.RevlogError(
3324 raise error.RevlogError(
3324 _(b'cannot censor with version %d revlogs')
3325 _(b'cannot censor with version %d revlogs')
3325 % self._format_version
3326 % self._format_version
3326 )
3327 )
3327 elif self._format_version == REVLOGV1:
3328 elif self._format_version == REVLOGV1:
3328 rewrite.v1_censor(self, tr, censornode, tombstone)
3329 rewrite.v1_censor(self, tr, censornode, tombstone)
3329 else:
3330 else:
3330 rewrite.v2_censor(self, tr, censornode, tombstone)
3331 rewrite.v2_censor(self, tr, censornode, tombstone)
3331
3332
3332 def verifyintegrity(self, state):
3333 def verifyintegrity(self, state):
3333 """Verifies the integrity of the revlog.
3334 """Verifies the integrity of the revlog.
3334
3335
3335 Yields ``revlogproblem`` instances describing problems that are
3336 Yields ``revlogproblem`` instances describing problems that are
3336 found.
3337 found.
3337 """
3338 """
3338 dd, di = self.checksize()
3339 dd, di = self.checksize()
3339 if dd:
3340 if dd:
3340 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3341 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3341 if di:
3342 if di:
3342 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3343 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3343
3344
3344 version = self._format_version
3345 version = self._format_version
3345
3346
3346 # The verifier tells us what version revlog we should be.
3347 # The verifier tells us what version revlog we should be.
3347 if version != state[b'expectedversion']:
3348 if version != state[b'expectedversion']:
3348 yield revlogproblem(
3349 yield revlogproblem(
3349 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3350 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3350 % (self.display_id, version, state[b'expectedversion'])
3351 % (self.display_id, version, state[b'expectedversion'])
3351 )
3352 )
3352
3353
3353 state[b'skipread'] = set()
3354 state[b'skipread'] = set()
3354 state[b'safe_renamed'] = set()
3355 state[b'safe_renamed'] = set()
3355
3356
3356 for rev in self:
3357 for rev in self:
3357 node = self.node(rev)
3358 node = self.node(rev)
3358
3359
3359 # Verify contents. 4 cases to care about:
3360 # Verify contents. 4 cases to care about:
3360 #
3361 #
3361 # common: the most common case
3362 # common: the most common case
3362 # rename: with a rename
3363 # rename: with a rename
3363 # meta: file content starts with b'\1\n', the metadata
3364 # meta: file content starts with b'\1\n', the metadata
3364 # header defined in filelog.py, but without a rename
3365 # header defined in filelog.py, but without a rename
3365 # ext: content stored externally
3366 # ext: content stored externally
3366 #
3367 #
3367 # More formally, their differences are shown below:
3368 # More formally, their differences are shown below:
3368 #
3369 #
3369 # | common | rename | meta | ext
3370 # | common | rename | meta | ext
3370 # -------------------------------------------------------
3371 # -------------------------------------------------------
3371 # flags() | 0 | 0 | 0 | not 0
3372 # flags() | 0 | 0 | 0 | not 0
3372 # renamed() | False | True | False | ?
3373 # renamed() | False | True | False | ?
3373 # rawtext[0:2]=='\1\n'| False | True | True | ?
3374 # rawtext[0:2]=='\1\n'| False | True | True | ?
3374 #
3375 #
3375 # "rawtext" means the raw text stored in revlog data, which
3376 # "rawtext" means the raw text stored in revlog data, which
3376 # could be retrieved by "rawdata(rev)". "text"
3377 # could be retrieved by "rawdata(rev)". "text"
3377 # mentioned below is "revision(rev)".
3378 # mentioned below is "revision(rev)".
3378 #
3379 #
3379 # There are 3 different lengths stored physically:
3380 # There are 3 different lengths stored physically:
3380 # 1. L1: rawsize, stored in revlog index
3381 # 1. L1: rawsize, stored in revlog index
3381 # 2. L2: len(rawtext), stored in revlog data
3382 # 2. L2: len(rawtext), stored in revlog data
3382 # 3. L3: len(text), stored in revlog data if flags==0, or
3383 # 3. L3: len(text), stored in revlog data if flags==0, or
3383 # possibly somewhere else if flags!=0
3384 # possibly somewhere else if flags!=0
3384 #
3385 #
3385 # L1 should be equal to L2. L3 could be different from them.
3386 # L1 should be equal to L2. L3 could be different from them.
3386 # "text" may or may not affect commit hash depending on flag
3387 # "text" may or may not affect commit hash depending on flag
3387 # processors (see flagutil.addflagprocessor).
3388 # processors (see flagutil.addflagprocessor).
3388 #
3389 #
3389 # | common | rename | meta | ext
3390 # | common | rename | meta | ext
3390 # -------------------------------------------------
3391 # -------------------------------------------------
3391 # rawsize() | L1 | L1 | L1 | L1
3392 # rawsize() | L1 | L1 | L1 | L1
3392 # size() | L1 | L2-LM | L1(*) | L1 (?)
3393 # size() | L1 | L2-LM | L1(*) | L1 (?)
3393 # len(rawtext) | L2 | L2 | L2 | L2
3394 # len(rawtext) | L2 | L2 | L2 | L2
3394 # len(text) | L2 | L2 | L2 | L3
3395 # len(text) | L2 | L2 | L2 | L3
3395 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3396 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3396 #
3397 #
3397 # LM: length of metadata, depending on rawtext
3398 # LM: length of metadata, depending on rawtext
3398 # (*): not ideal, see comment in filelog.size
3399 # (*): not ideal, see comment in filelog.size
3399 # (?): could be "- len(meta)" if the resolved content has
3400 # (?): could be "- len(meta)" if the resolved content has
3400 # rename metadata
3401 # rename metadata
3401 #
3402 #
3402 # Checks needed to be done:
3403 # Checks needed to be done:
3403 # 1. length check: L1 == L2, in all cases.
3404 # 1. length check: L1 == L2, in all cases.
3404 # 2. hash check: depending on flag processor, we may need to
3405 # 2. hash check: depending on flag processor, we may need to
3405 # use either "text" (external), or "rawtext" (in revlog).
3406 # use either "text" (external), or "rawtext" (in revlog).
3406
3407
3407 try:
3408 try:
3408 skipflags = state.get(b'skipflags', 0)
3409 skipflags = state.get(b'skipflags', 0)
3409 if skipflags:
3410 if skipflags:
3410 skipflags &= self.flags(rev)
3411 skipflags &= self.flags(rev)
3411
3412
3412 _verify_revision(self, skipflags, state, node)
3413 _verify_revision(self, skipflags, state, node)
3413
3414
3414 l1 = self.rawsize(rev)
3415 l1 = self.rawsize(rev)
3415 l2 = len(self.rawdata(node))
3416 l2 = len(self.rawdata(node))
3416
3417
3417 if l1 != l2:
3418 if l1 != l2:
3418 yield revlogproblem(
3419 yield revlogproblem(
3419 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3420 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3420 node=node,
3421 node=node,
3421 )
3422 )
3422
3423
3423 except error.CensoredNodeError:
3424 except error.CensoredNodeError:
3424 if state[b'erroroncensored']:
3425 if state[b'erroroncensored']:
3425 yield revlogproblem(
3426 yield revlogproblem(
3426 error=_(b'censored file data'), node=node
3427 error=_(b'censored file data'), node=node
3427 )
3428 )
3428 state[b'skipread'].add(node)
3429 state[b'skipread'].add(node)
3429 except Exception as e:
3430 except Exception as e:
3430 yield revlogproblem(
3431 yield revlogproblem(
3431 error=_(b'unpacking %s: %s')
3432 error=_(b'unpacking %s: %s')
3432 % (short(node), stringutil.forcebytestr(e)),
3433 % (short(node), stringutil.forcebytestr(e)),
3433 node=node,
3434 node=node,
3434 )
3435 )
3435 state[b'skipread'].add(node)
3436 state[b'skipread'].add(node)
3436
3437
3437 def storageinfo(
3438 def storageinfo(
3438 self,
3439 self,
3439 exclusivefiles=False,
3440 exclusivefiles=False,
3440 sharedfiles=False,
3441 sharedfiles=False,
3441 revisionscount=False,
3442 revisionscount=False,
3442 trackedsize=False,
3443 trackedsize=False,
3443 storedsize=False,
3444 storedsize=False,
3444 ):
3445 ):
3445 d = {}
3446 d = {}
3446
3447
3447 if exclusivefiles:
3448 if exclusivefiles:
3448 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3449 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3449 if not self._inline:
3450 if not self._inline:
3450 d[b'exclusivefiles'].append((self.opener, self._datafile))
3451 d[b'exclusivefiles'].append((self.opener, self._datafile))
3451
3452
3452 if sharedfiles:
3453 if sharedfiles:
3453 d[b'sharedfiles'] = []
3454 d[b'sharedfiles'] = []
3454
3455
3455 if revisionscount:
3456 if revisionscount:
3456 d[b'revisionscount'] = len(self)
3457 d[b'revisionscount'] = len(self)
3457
3458
3458 if trackedsize:
3459 if trackedsize:
3459 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3460 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3460
3461
3461 if storedsize:
3462 if storedsize:
3462 d[b'storedsize'] = sum(
3463 d[b'storedsize'] = sum(
3463 self.opener.stat(path).st_size for path in self.files()
3464 self.opener.stat(path).st_size for path in self.files()
3464 )
3465 )
3465
3466
3466 return d
3467 return d
3467
3468
3468 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3469 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3469 if not self.hassidedata:
3470 if not self.hassidedata:
3470 return
3471 return
3471 # revlog formats with sidedata support does not support inline
3472 # revlog formats with sidedata support does not support inline
3472 assert not self._inline
3473 assert not self._inline
3473 if not helpers[1] and not helpers[2]:
3474 if not helpers[1] and not helpers[2]:
3474 # Nothing to generate or remove
3475 # Nothing to generate or remove
3475 return
3476 return
3476
3477
3477 new_entries = []
3478 new_entries = []
3478 # append the new sidedata
3479 # append the new sidedata
3479 with self._writing(transaction):
3480 with self._writing(transaction):
3480 ifh, dfh, sdfh = self._writinghandles
3481 ifh, dfh, sdfh = self._writinghandles
3481 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3482 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3482
3483
3483 current_offset = sdfh.tell()
3484 current_offset = sdfh.tell()
3484 for rev in range(startrev, endrev + 1):
3485 for rev in range(startrev, endrev + 1):
3485 entry = self.index[rev]
3486 entry = self.index[rev]
3486 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3487 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3487 store=self,
3488 store=self,
3488 sidedata_helpers=helpers,
3489 sidedata_helpers=helpers,
3489 sidedata={},
3490 sidedata={},
3490 rev=rev,
3491 rev=rev,
3491 )
3492 )
3492
3493
3493 serialized_sidedata = sidedatautil.serialize_sidedata(
3494 serialized_sidedata = sidedatautil.serialize_sidedata(
3494 new_sidedata
3495 new_sidedata
3495 )
3496 )
3496
3497
3497 sidedata_compression_mode = COMP_MODE_INLINE
3498 sidedata_compression_mode = COMP_MODE_INLINE
3498 if serialized_sidedata and self.hassidedata:
3499 if serialized_sidedata and self.hassidedata:
3499 sidedata_compression_mode = COMP_MODE_PLAIN
3500 sidedata_compression_mode = COMP_MODE_PLAIN
3500 h, comp_sidedata = self.compress(serialized_sidedata)
3501 h, comp_sidedata = self.compress(serialized_sidedata)
3501 if (
3502 if (
3502 h != b'u'
3503 h != b'u'
3503 and comp_sidedata[0] != b'\0'
3504 and comp_sidedata[0] != b'\0'
3504 and len(comp_sidedata) < len(serialized_sidedata)
3505 and len(comp_sidedata) < len(serialized_sidedata)
3505 ):
3506 ):
3506 assert not h
3507 assert not h
3507 if (
3508 if (
3508 comp_sidedata[0]
3509 comp_sidedata[0]
3509 == self._docket.default_compression_header
3510 == self._docket.default_compression_header
3510 ):
3511 ):
3511 sidedata_compression_mode = COMP_MODE_DEFAULT
3512 sidedata_compression_mode = COMP_MODE_DEFAULT
3512 serialized_sidedata = comp_sidedata
3513 serialized_sidedata = comp_sidedata
3513 else:
3514 else:
3514 sidedata_compression_mode = COMP_MODE_INLINE
3515 sidedata_compression_mode = COMP_MODE_INLINE
3515 serialized_sidedata = comp_sidedata
3516 serialized_sidedata = comp_sidedata
3516 if entry[8] != 0 or entry[9] != 0:
3517 if entry[8] != 0 or entry[9] != 0:
3517 # rewriting entries that already have sidedata is not
3518 # rewriting entries that already have sidedata is not
3518 # supported yet, because it introduces garbage data in the
3519 # supported yet, because it introduces garbage data in the
3519 # revlog.
3520 # revlog.
3520 msg = b"rewriting existing sidedata is not supported yet"
3521 msg = b"rewriting existing sidedata is not supported yet"
3521 raise error.Abort(msg)
3522 raise error.Abort(msg)
3522
3523
3523 # Apply (potential) flags to add and to remove after running
3524 # Apply (potential) flags to add and to remove after running
3524 # the sidedata helpers
3525 # the sidedata helpers
3525 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3526 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3526 entry_update = (
3527 entry_update = (
3527 current_offset,
3528 current_offset,
3528 len(serialized_sidedata),
3529 len(serialized_sidedata),
3529 new_offset_flags,
3530 new_offset_flags,
3530 sidedata_compression_mode,
3531 sidedata_compression_mode,
3531 )
3532 )
3532
3533
3533 # the sidedata computation might have move the file cursors around
3534 # the sidedata computation might have move the file cursors around
3534 sdfh.seek(current_offset, os.SEEK_SET)
3535 sdfh.seek(current_offset, os.SEEK_SET)
3535 sdfh.write(serialized_sidedata)
3536 sdfh.write(serialized_sidedata)
3536 new_entries.append(entry_update)
3537 new_entries.append(entry_update)
3537 current_offset += len(serialized_sidedata)
3538 current_offset += len(serialized_sidedata)
3538 self._docket.sidedata_end = sdfh.tell()
3539 self._docket.sidedata_end = sdfh.tell()
3539
3540
3540 # rewrite the new index entries
3541 # rewrite the new index entries
3541 ifh.seek(startrev * self.index.entry_size)
3542 ifh.seek(startrev * self.index.entry_size)
3542 for i, e in enumerate(new_entries):
3543 for i, e in enumerate(new_entries):
3543 rev = startrev + i
3544 rev = startrev + i
3544 self.index.replace_sidedata_info(rev, *e)
3545 self.index.replace_sidedata_info(rev, *e)
3545 packed = self.index.entry_binary(rev)
3546 packed = self.index.entry_binary(rev)
3546 if rev == 0 and self._docket is None:
3547 if rev == 0 and self._docket is None:
3547 header = self._format_flags | self._format_version
3548 header = self._format_flags | self._format_version
3548 header = self.index.pack_header(header)
3549 header = self.index.pack_header(header)
3549 packed = header + packed
3550 packed = header + packed
3550 ifh.write(packed)
3551 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now